##############################################################################
##############################################################################
##############################################################################
##############################################################################
## Replication code for:
## Zhukov, "Repressed Memories" (JOP)
## ***
## WARNING: Execute run1_setup.R before running this file !!!
## ***
##############################################################################
##############################################################################
##############################################################################
##############################################################################

# Print upon starting
print("**** Starting run2_main.R ****")




##############################################################################
##############################################################################
##############################################################################
## Figure 2
## Volume of petitions over time
##############################################################################
##############################################################################
##############################################################################


rm(list=ls())

##################################
# Prepare data objects
##################################

# Load individual-level data 
X_msk <- readRDS("data/individuals/moscow_individuals.RDS") 
X_spb <- readRDS("data/individuals/leningrad_individuals.RDS") 
# Extract dates
msk_petitions <- X_msk[LA_ANY_2>0&!is.na(LA_DATE),LA_DATE%>%as.character()%>%as.Date(format = "%Y%m%d")%>%sort()]
spb_petitions <- X_spb[LA_ANY>0&!is.na(LA_DATE),LA_DATE%>%as.character()%>%as.Date(format = "%Y%m%d")%>%sort()]
msk_dr <- X_msk[LA_DENREMREQ_2>0&!is.na(LA_DATE),LA_DATE%>%as.character()%>%as.Date(format = "%Y%m%d")%>%sort()]
spb_dr <- X_spb[LA_DENREMREQ>0&!is.na(LA_DATE),LA_DATE%>%as.character()%>%as.Date(format = "%Y%m%d")%>%sort()]
msk_removals <- X_msk[LA_REMOVED_2>0&!is.na(LA_DATE),LA_DATE%>%as.character()%>%as.Date(format = "%Y%m%d")%>%sort()]
spb_removals <- X_spb[LA_REMOVED>0&!is.na(LA_DATE),LA_DATE%>%as.character()%>%as.Date(format = "%Y%m%d")%>%sort()]

# Proportion after 2022 invasion
mean(c(msk_petitions,spb_petitions)>"2022-02-24") 
mean(c(msk_dr,spb_dr)>"2022-02-24") 

##################################
# Make Figure 2a
##################################

w_ <- 6; h_ <- w_/1.618
png("results/fig2a.png",width=w_,height=h_,units="in",res=300)
par(mar=c(2,4,0.5,0.5))
hist(c(msk_petitions,spb_petitions), breaks = "years", freq = TRUE, xlab = "Date",xlim=c(as.Date("2014-01-01"),as.Date("2025-12-31")),xaxt="n",main="",ylim=pretty(table(c(msk_petitions,spb_petitions)%>%substr(1,4)))%>%range(),ylab="Petitions for Last Address plaques",col="black",density=20,angle=45)
axis(1,at=pretty(c(as.Date("2014-01-01"),as.Date("2025-04-30"))),labels=pretty(c(as.Date("2014-01-01"),as.Date("2025-04-30")))%>%substr(1,4))
abline(v=as.Date("2014-03-18"),lty=3,col="grey50")
text(x=as.Date("2014-03-18"),y=pretty(table(c(msk_petitions,spb_petitions)%>%substr(1,4)))%>%max(),labels="Crimea annexation",srt=90,pos=2,col="grey50")
abline(v=as.Date("2014-12-07"),lty=1,col="grey50")
text(x=as.Date("2014-12-07"),y=pretty(table(c(msk_petitions,spb_petitions)%>%substr(1,4)))%>%max(),labels="Start of Last Address project",srt=90,pos=2,col="grey50")
abline(v=as.Date("2022-02-24"),lty=2,col="grey50")
text(x=as.Date("2022-02-24")+210,y=pretty(table(c(msk_petitions,spb_petitions)%>%substr(1,4)))%>%max(),labels="Full-scale invasion of Ukraine",srt=90,pos=2,col="grey50")
dev.off()

##################################
# Make Figure 2n
##################################

png("results/fig2b.png",width=w_,height=h_,units="in",res=300)
par(mar=c(2,4,0.5,0.5))
hist(c(msk_dr,spb_dr), breaks = "years", freq = TRUE, xlab = "Date",xlim=c(as.Date("2014-01-01"),as.Date("2025-12-31")),xaxt="n",main="",ylim=pretty(table(c(msk_dr,spb_dr)%>%substr(1,4)))%>%range(),ylab="Denials and removals",col="black",density=20,angle=-45)
axis(1,at=pretty(c(as.Date("2014-01-01"),as.Date("2025-04-30"))),labels=pretty(c(as.Date("2014-01-01"),as.Date("2025-04-30")))%>%substr(1,4))
abline(v=as.Date("2014-03-18"),lty=3,col="grey50")
text(x=as.Date("2014-03-18"),y=pretty(table(c(msk_dr,spb_dr)%>%substr(1,4)))%>%max(),labels="Crimea annexation",srt=90,pos=2,col="grey50")
abline(v=as.Date("2014-12-07"),lty=1,col="grey50")
text(x=as.Date("2014-12-07"),y=pretty(table(c(msk_dr,spb_dr)%>%substr(1,4)))%>%max(),labels="Start of Last Address project",srt=90,pos=2,col="grey50")
abline(v=as.Date("2022-02-24"),lty=2,col="grey50")
text(x=as.Date("2022-02-24"),y=pretty(table(c(msk_dr,spb_dr)%>%substr(1,4)))%>%max(),labels="Full-scale invasion of Ukraine",srt=90,pos=2,col="grey50")
dev.off()


##############################################################################
##############################################################################
##############################################################################
## Figure 3
## Maps
##############################################################################
##############################################################################
##############################################################################


##################################
# Moscow (Figure 3)
##################################

rm(list=ls())

# Load geospatial data
moscow1938_districts <- sf::read_sf("data/blocks/moscow1938_districts.geojson")
moscow1938_blocks <- sf::read_sf("data/blocks/moscow1938_blocks.geojson")
moscow1938_blocks_dt <- moscow1938_blocks %>% data.table::as.data.table()  %>% .[!is.na(RAY_NAME)] %>% .[!is.na(wr)]
bb <- moscow1938_blocks_dt[,sf::st_as_sf(geometry) %>% SUNGEO::update_bbox() %>% sf::st_bbox()]
suppressWarnings({
  suppressMessages({
    arrests <- readRDS("data/individuals/moscow_individuals.RDS") %>% sf::st_as_sf(na.fail=FALSE,remove=FALSE,crs=4326) %>% sf::st_intersection(bb%>%sf::st_as_sfc())
  })
})

# Figure a
w_ <- 3.5; h_ <- w_
png("results/fig3a.png",width=w_,height=h_,units="in",res=300)
par(mar=c(0,0,0,0))
plot(0,0,xlim=bb[c("xmin","xmax")],ylim=bb[c("ymin","ymax")],col=NA,bty="n",xaxt="n",yaxt="n")
plot(moscow1938_blocks["geometry"],col="grey80",border=NA,lty=1,add=TRUE,lwd=.5)
plot(moscow1938_districts["geometry"],col=NA,border="black",lty=1,add=TRUE,lwd=.5)
dev.off()

# Figure b
w_ <- 3.5; h_ <- w_
png("results/fig3b.png",width=w_,height=h_,units="in",res=300)
par(mar=c(0,0,0,0))
plot(moscow1938_blocks_dt[,geometry],xlim=bb[c("xmin","xmax")],ylim=bb[c("ymin","ymax")],col="grey80",border=NA,bty="n",xaxt="n",yaxt="n")
plot(arrests["geometry"],add=TRUE,col="black",pch=16,cex=.2)
dev.off()



##############################################################################
##############################################################################
##############################################################################
## Tables 1 & A2.3
## Block-level
##############################################################################
##############################################################################
##############################################################################

rm(list=ls())

##################################
# Prepare environment
##################################

# Custom functions
source("code/functions.R")

# Prepare data
X <- sf::read_sf("data/blocks/moscow1938_blocks.geojson") %>% data.table::as.data.table()  %>% .[!is.na(RAY_NAME)] %>% .[!is.na(wr)] 

##################################
# Estimate models
##################################

zonez <- paste0(grep("^ZONE_",names(X),value=TRUE),collapse="+")
tvarz <- c("N_EVENT_R","EVT_PCTR")
ols_mods <- lapply(1:length(tvarz),function(t0){
  suppressMessages({
    # Linear
    model_ols <- Formula::Formula(paste0("c(log(N_TOTAL_R+1),DENREMREQ_PCTR) ~ log(",tvarz[t0],"+1) + TROIKA_DIST  + ",zonez," + splines::bs(LONG)*splines::bs(LAT)|  RAY_NAME  ")%>%as.formula())
    out_ols <- fixest::feols(model_ols, data = X, weights = ~ wr, lean = TRUE, cluster = ~ RAY_NAME) ; fixest::etable(out_ols)
    ols_mods1 <- feols_coef(out_ols,tvar=tvarz[t0],fez=c("RAY_NAME","BLOK_ZONE"))
    # Binomial
    model_glm <- Formula::Formula(paste0("c(DENREMREQ_PCTR/100,DENREMREQ_PCTR/100) ~ log(",tvarz[t0],"+1) + TROIKA_DIST  + ",zonez," + splines::bs(LONG)*splines::bs(LAT)|  RAY_NAME  ")%>%as.formula())
    out_glm <- fixest::feglm(model_glm, data = X, weights = ~ wr, lean = FALSE, family="binomial", cluster = ~ RAY_NAME) ; fixest::etable(out_glm)
    glm_mods1 <- feols_coef(out_glm,tvar=tvarz[t0],fez=c("RAY_NAME","BLOK_ZONE"))  
    list(ols_mods1,glm_mods1)%>%data.table::rbindlist()%>%unique()
  })
})%>%data.table::rbindlist()

##################################
# Make tables
##################################

# Table 1
xtabz(modmat=ols_mods,yz = "TOTAL|DENREMREQ_PCTR",xz = "EVENT",modz = "feols|feglm",yordz = c("log(N_TOTAL_R + 1)","DENREMREQ_PCTR","DENREMREQ_PCTR/100"),xordz = c("log(N_EVENT_R + 1)","log(EVT_PCTR + 1)"),capz = "\\textbf{Severity of repression and memorialization}.",labbz = "tab:main",fit_width=TRUE,drop_stats="aic",subcaption="Estimates from Linear and Binomial fixed effect regression models. Treatment is number of city block residents executed (logged). Outcome is log-transformed in Linear model, rescaled as proportion between 0 and 1 in Binomial model. Robust standard errors in parentheses, clustered by rayon. All models include spatial spline and block-level covariates. Observations (blocks) weighted by population size. Significance levels (two-tailed): $^{\\dagger} p < 0.1$; $^{*} p < 0.05$; $^{**} p < 0.01$.",fn="results/tab1.tex")

# Table A2.3
xtabz(modmat=ols_mods,yz = "TOTAL|DENREMREQ_PCTR",xz = "EVT_PCTR",modz = "feols|feglm",yordz = c("log(N_TOTAL_R + 1)","PLA_PCTR","DENREMREQ_PCTR","DENREMREQ_PCTR/100"),xordz = c("log(N_EVENT_R + 1)","log(EVT_PCTR + 1)"),capz = "\\textbf{Per capita repression and memorialization}.",labbz = "tab:main_pc",fit_width=TRUE,drop_stats="aic",subcaption="Estimates from Linear and Binomial fixed effect regression models. Treatment is percent of city block residents executed (logged). Outcome is log-transformed in Linear model, rescaled as proportion between 0 and 1 in Binomial model. Robust standard errors in parentheses, clustered by rayon. All models include spatial spline and block-level covariates. Observations (blocks) weighted by population size. Significance levels (two-tailed): $^{\\dagger} p < 0.1$; $^{*} p < 0.05$; $^{**} p < 0.01$.",fn="results/tabA23.tex")


##############################################################################
##############################################################################
##############################################################################
## Figure 4
## Individual-level (neighbors)
##############################################################################
##############################################################################
##############################################################################


rm(list=ls())

##################################
# Prepare environment
##################################

# Load data
X <- readRDS("data/individuals/moscow_individuals.RDS") 

##################################
# Estimate models
##################################

# Model specification
yvarz <- c("LA_ANY_2","LA_DENREMREQ_2")
zonez <- X %>% .[!is.na(get(yvarz[2])),grep("^ZONE_",names(.),value=TRUE)[apply(.SD,2,sum,na.rm=TRUE)>0],.SDcols=grep("^ZONE_",names(.),value=TRUE)]%>%paste0(collapse="+")
tvar <- "N_ARREST"
xlab_<-"Neighbors executed"
suppressMessages({
  formz <- Formula::Formula(formula(paste("c(",paste(yvarz,collapse=", "),") ~ log(",tvar,"+1) + dob_year + male + p_military + p_clergy + manager +", zonez, "| RAY_NAME + nationality_short + party + okonkh")))
})

# Fit base model
out_ <- fixest::feglm(formz,data=X ,family="binomial",cluster=~RESI_ID,nthreads=1); fixest::etable(out_)

# Set parameters
n_sim <- 1000
x_len <- 1000
xz <- X[,seq(0,max(get(tvar)),length.out=x_len)]

# Non-parametric bootstrap
{
  pb <- txtProgressBar(min = 0, max = n_sim, style = 3)
  set.seed(123); y_predz <- lapply(1:n_sim,function(b0){
    setTxtProgressBar(pb, b0)
    tryCatch({
    suppressMessages({
      out_bs <- fixest::feglm(formz,data=X %>% .[sample(.N,replace=TRUE)],family="binomial",cluster=~RESI_ID,nthreads=1)
    })
    X_new <- X %>% .[,.SD,.SDcols=out_[[2]][["fml_all"]]%>%as.character()%>%gsub("log\\(| \\+ 1\\)|I\\(|\\/100\\)|c\\(|splines\\:\\:bs\\(|\\)|^\\~","",.)%>%str_split(" \\+ | \\* | \\~ ")%>%unlist()%>%unique()%>%.[nchar(.)>0]]%>%.[,lapply(.SD,function(x){ifelse(is.numeric(x),median(x,na.rm=TRUE),table(x)%>%sort()%>%rev()%>%names()%>%.[1])})] %>% .[rep(1,x_len)]  %>% .[,eval(tvar):=xz]
    yz_pred_bs_1 <- predict(out_bs[[1]],newdata=X_new,type="response")
    yz_pred_bs_2 <- predict(out_bs[[2]],newdata=X_new,type="response")
    list(LA_ANY=yz_pred_bs_1,LA_DENREMREQ=yz_pred_bs_2)
    },error=function(e){print(e)})
  }) 
  suppressMessages({
    y_predz_1 <- y_predz %>% .[lapply(y_predz,length)==2] %>% sapply("[",1) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    y_predz_2 <- y_predz %>% .[lapply(y_predz,length)==2] %>% sapply("[",2) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
  })
}

##################################
# Make figures
##################################

# Log scale
{
  x_ <- log(xz+1)
  xlabz <- pretty(exp(x_)-1)%>%c(.,1,2,3,4,5,10,25)%>%sort()
  atz <- log(xlabz+1)
  rugz <- X[,log(get(tvar))]
}
w_ <- 6; h_ <- w_/1.618

# Figure a
png("results/fig4a.png",width=w_,height=h_,units="in",res=300)
{
  par(mar=c(4,3,0.5,0.5))
  plot(x=x_,y=y_predz_1[,apply(.SD,1,median)],ylim=range(pretty(quantile(as.matrix(y_predz_1),c(0,1)))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab=xlab_,xaxt="n",xlim=c(0,max(x_)),cex.lab=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col="grey90")
  lines(x=x_,y=y_predz_1[,apply(.SD,1,median)],lwd=1.618)
  axis(2,las=2)
  axis(1,at=atz,labels=xlabz)
  suppressWarnings({
    rug(jitter(rugz,amount=.25),lwd=.1)
  })
}
dev.off()

# Figure b
png("results/fig4b.png",width=w_,height=h_,units="in",res=300)
{
  par(mar=c(4,3,0.5,0.5))
  plot(x=x_,y=y_predz_2[,apply(.SD,1,median)],ylim=range(pretty(quantile(as.matrix(y_predz_2),c(0,1)))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab=xlab_,xaxt="n",xlim=c(0,max(x_)),cex.lab=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_2[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col="grey90")
  lines(x=x_,y=y_predz_2[,apply(.SD,1,median)],lwd=1.618)
  axis(2,las=2)
  axis(1,at=atz,labels=xlabz)
  suppressWarnings({
    rug(jitter(rugz,amount=.25),lwd=.1)
  })
}
dev.off()



##############################################################################
##############################################################################
##############################################################################
## Figure 5
## Individual-level (coworkers)
##############################################################################
##############################################################################
##############################################################################


rm(list=ls())

##################################
# Prepare environment
##################################

# Load data
X <- readRDS("data/individuals/moscow_individuals.RDS") 

##################################
# Estimate models
##################################

# Model specification
yvarz <- c("LA_ANY_2","LA_DENREMREQ_2")
zonez <- X %>% .[!is.na(get(yvarz[2])),grep("^ZONE_",names(.),value=TRUE)[apply(.SD,2,sum,na.rm=TRUE)>0],.SDcols=grep("^ZONE_",names(.),value=TRUE)]%>%paste0(collapse="+")
tvar <- "N_ARREST_WORK"
xlab_<-"Co-workers executed"
suppressMessages({
  formz <- Formula::Formula(formula(paste("c(",paste(yvarz,collapse=", "),") ~ log(",tvar,"+1) + dob_year + male + p_military + p_clergy + manager +", zonez, "| RAY_NAME + nationality_short + party + okonkh")))
})

# Fit base model
out_ <- fixest::feglm(formz,data=X,family="binomial",cluster=~RESI_ID,nthreads=1); fixest::etable(out_)

# Set parameters
n_sim <- 1000
x_len <- 1000
xz <- X[,seq(0,max(get(tvar),na.rm=TRUE),length.out=x_len)]

# Non-parametric bootstrap
{
  pb <- txtProgressBar(min = 0, max = n_sim, style = 3)
  set.seed(123); y_predz <- lapply(1:n_sim,function(b0){
    setTxtProgressBar(pb, b0)
    tryCatch({
    suppressMessages({
      out_bs <- fixest::feglm(formz,data=X %>% .[sample(.N,replace=TRUE)],family="binomial",cluster=~RESI_ID,nthreads=1)
    })
    X_new <- X %>% .[,.SD,.SDcols=out_[[2]][["fml_all"]]%>%as.character()%>%gsub("log\\(| \\+ 1\\)|I\\(|\\/100\\)|c\\(|splines\\:\\:bs\\(|\\)|^\\~","",.)%>%str_split(" \\+ | \\* | \\~ ")%>%unlist()%>%unique()%>%.[nchar(.)>0]]%>%.[,lapply(.SD,function(x){ifelse(is.numeric(x),median(x,na.rm=TRUE),table(x)%>%sort()%>%rev()%>%names()%>%.[1])})] %>% .[rep(1,x_len)]  %>% .[,eval(tvar):=xz]
    yz_pred_bs_1 <- predict(out_bs[[1]],newdata=X_new,type="response")
    yz_pred_bs_2 <- predict(out_bs[[2]],newdata=X_new,type="response")
    list(LA_ANY=yz_pred_bs_1,LA_DENREMREQ=yz_pred_bs_2)
    },error=function(e){print(e)})
  }) 
  suppressMessages({
    y_predz_1 <- y_predz %>% .[lapply(y_predz,length)==2] %>% sapply("[",1) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    y_predz_2 <- y_predz %>% .[lapply(y_predz,length)==2] %>% sapply("[",2) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
  })
}

##################################
# Make figures
##################################

# Log scale
{
  x_ <- log(xz+1)
  xlabz <- pretty(exp(x_)-1)%>%c(.,1,2,3,4,5,10,25)%>%sort()
  atz <- log(xlabz+1)
  rugz <- X[,log(get(tvar))]
}
w_ <- 6; h_ <- w_/1.618

# Figure a
png("results/fig5a.png",width=w_,height=h_,units="in",res=300)
{
  par(mar=c(4,3,0.5,0.5))
  plot(x=x_,y=y_predz_1[,apply(.SD,1,median)],ylim=range(pretty(quantile(as.matrix(y_predz_1),c(0,1)))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab=xlab_,xaxt="n",xlim=c(0,max(x_)),cex.lab=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col="grey90")
  lines(x=x_,y=y_predz_1[,apply(.SD,1,median)],lwd=1.618)
  axis(2,las=2)
  axis(1,at=atz,labels=xlabz)
  suppressWarnings({
    rug(jitter(rugz,amount=.25),lwd=.1)
  })
}
dev.off()

# Figure b
png("results/fig5b.png",width=w_,height=h_,units="in",res=300)
{
  par(mar=c(4,3,0.5,0.5))
  plot(x=x_,y=y_predz_2[,apply(.SD,1,median)],ylim=range(pretty(quantile(as.matrix(y_predz_2),c(0,1)))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab=xlab_,xaxt="n",xlim=c(0,max(x_)),cex.lab=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_2[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col="grey90")
  lines(x=x_,y=y_predz_2[,apply(.SD,1,median)],lwd=1.618)
  axis(2,las=2)
  axis(1,at=atz,labels=xlabz)
  suppressWarnings({
    rug(jitter(rugz,amount=.25),lwd=.1)
  })
}
dev.off()




##############################################################################
##############################################################################
##############################################################################
## Figure 6 & A3.8
## Victim identity
##############################################################################
##############################################################################
##############################################################################

rm(list=ls())

##################################
# Prepare environment
##################################

# Load data
X <- readRDS("data/individuals/moscow_individuals.RDS")


##################################
# Model estimation and bootstrap
##################################

# Model specification
yvarz <- c("LA_ANY_2","LA_DENREMREQ_2")
zonez <- X %>% .[!is.na(get(yvarz[2])),grep("^ZONE_",names(.),value=TRUE)[apply(.SD,2,sum,na.rm=TRUE)>0],.SDcols=grep("^ZONE_",names(.),value=TRUE)]%>%paste0(collapse="+")
tvar <- "N_ARREST" 

# Set parameters
n_sim <- 1000
x_len <- 1000
# ii0 <- 1

# Loop over interactions
for(ii0 in 1:2){

  # Parametric bootstrap SE
  {

    ixvar <- c("eth_nonrus","born_foreign")[ii0]
    fevar <- "nationality_short"

    # GLM
    suppressMessages({
      formz_base <- Formula::Formula(formula(paste("c(",paste(yvarz,collapse=", "),") ~ log(",tvar,"+1)*",ixvar," + dob_year + male + p_military + p_clergy + manager +", zonez, "| RAY_NAME + nationality_short + party + okonkh")))
    })
    out_base <- fixest::feglm(formz_base,data=X,family="binomial",cluster=~RESI_ID); fixest::etable(out_base)

    # FE values
    fez <- data.table(nat=fixest::fixef(out_base[[1]])[[fevar]]%>%names(),fe1=fixest::fixef(out_base[[1]])[[fevar]]) %>% .[,fe2:=match(nat,names(fixest::fixef(out_base[[2]])[[fevar]]))%>%fixest::fixef(out_base[[2]])[[fevar]][.]]%>%.[nat!=""]
    ix_list <- c(ixvar=ixvar,ixcon0_1="рус",ixcon1_1=fez[which.min(fe1-fe1[nat=="рус"]),nat],ixcon0_2="рус",ixcon1_2=fez[which.max(fe2-fe2[nat=="рус"]),nat],lab0=c("Russian victim","Native-born victim")[ii0],lab1=c("Non-Russian victim","Foreign-born victim")[ii0],fevar=fevar)

    # Get interaction variables
    ixcon0_1 <- ix_list["ixcon0_1"]
    ixcon1_1 <- ix_list["ixcon1_1"]
    ixcon0_2 <- ix_list["ixcon0_2"]
    ixcon1_2 <- ix_list["ixcon1_2"]
    lab0 <- ix_list["lab0"]
    lab1 <- ix_list["lab1"]
    
    # Hypothetical x values
    xz <- X[,seq(0,max(get(tvar)),length.out=x_len)]
    X_new <- X %>% .[,.SD,.SDcols=out_base[[2]][["fml_all"]]%>%as.character()%>%gsub("log\\(| \\+ 1\\)|I\\(|\\/100\\)|c\\(|splines\\:\\:bs\\(|\\)|^\\~","",.)%>%str_split(" \\+ | \\* | \\~ ")%>%unlist()%>%unique()%>%.[nchar(.)>0]]%>%.[,lapply(.SD,function(x){ifelse(is.numeric(x),median(x,na.rm=TRUE),table(x)%>%sort()%>%rev()%>%names()%>%.[1])})] %>% .[rep(1,x_len)]  %>% .[,eval(tvar):=xz]
    # Generate predictions, sampling coefficients from multivariate Normal distro
    pb <- txtProgressBar(min = 0, max = n_sim, style = 3)
    set.seed(123); y_predz <- lapply(1:n_sim,function(b0){
      setTxtProgressBar(pb, b0)
      out_copy <- data.table::copy(out_base)
      out_copy[[1]][["coefficients"]] <- mvtnorm::rmvnorm(n=1000,mean=out_copy[[1]][["coefficients"]]%>%unlist(),sigma=out_copy[[1]]%>%.[["cov.scaled"]])%>%apply(2,mean)
      out_copy[[2]][["coefficients"]] <- mvtnorm::rmvnorm(n=1000,mean=out_copy[[2]][["coefficients"]]%>%unlist(),sigma=out_copy[[2]]%>%.[["cov.scaled"]])%>%apply(2,mean)
      yz_pred_bs_1_0 <- predict(out_copy[[1]],newdata=X_new%>%.[,eval(ixvar):=0]%>%.[,eval(fevar):=ixcon0_1],type="response")
      yz_pred_bs_1_1 <- predict(out_copy[[1]],newdata=X_new%>%.[,eval(ixvar):=1]%>%.[,eval(fevar):=ixcon1_1],type="response")
      yz_pred_bs_2_0 <- predict(out_copy[[2]],newdata=X_new%>%.[,eval(ixvar):=0]%>%.[,eval(fevar):=ixcon0_2],type="response")
      yz_pred_bs_2_1 <- predict(out_copy[[2]],newdata=X_new%>%.[,eval(ixvar):=1]%>%.[,eval(fevar):=ixcon1_2],type="response")
      rm(out_copy)
      list(Y1_0=yz_pred_bs_1_0,Y1_1=yz_pred_bs_1_1,Y2_0=yz_pred_bs_2_0,Y2_1=yz_pred_bs_2_1)
    }) 
    suppressMessages({
      y_predz_1_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",1) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
      y_predz_1_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",2) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
      y_predz_2_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",3) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
      y_predz_2_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",4) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    })
  }

  # Make Figure a
  {
    x_ <- log(xz+1)
    xlabz <- pretty(exp(x_)-1)%>%c(.,1,2,3,4,5,10,25)%>%sort()
    atz <- log(xlabz+1)
    rugz <- X[,log(N_ARREST)]
  }
  w_ <- 6; h_ <- w_/1.618

  png(paste0("results/fig",ifelse(grepl("eth",ixvar),"6","A38"),"a.png"),width=w_,height=h_,units="in",res=300)
  {
    par(mar=c(4,3,0.5,0.5))
    plot(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],ylim=range(pretty(quantile(unlist(c(y_predz_1_0,y_predz_1_1)),c(0,1)))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Neighbors executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_1_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
    lines(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],lwd=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_1_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
    lines(x=x_,y=y_predz_1_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
    axis(2,las=2)
    axis(1,at=atz,labels=xlabz)
    suppressWarnings({
      rug(jitter(rugz,amount=.25),lwd=.1)
    })
    legend(x="topleft",lty=c(1,2),lwd=1.618,bty="n",legend=c(lab0,lab1),cex=1.618)
  }
  dev.off()

  # Make Figure b
  png(paste0("results/fig",ifelse(grepl("eth",ixvar),"6","A38"),"b.png"),width=w_,height=h_,units="in",res=300)
  {
    par(mar=c(4,3,0.5,0.5))
    plot(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],ylim=range(pretty(quantile(unlist(c(y_predz_2_0,y_predz_2_1)),c(0,1),na.rm=TRUE))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Neighbors executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_2_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
    lines(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],lwd=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_2_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
    lines(x=x_,y=y_predz_2_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
    axis(2,las=2)
    axis(1,at=atz,labels=xlabz)
    suppressWarnings({
      rug(jitter(rugz,amount=.25),lwd=.1)
    })
  }
  dev.off()

}




##############################################################################
##############################################################################
##############################################################################
## Figures 7, A3.9, A3.10, A3.11
## Victim culpability
##############################################################################
##############################################################################
##############################################################################

rm(list=ls())

##################################
# Prepare environment
##################################

# Load data
X <- readRDS("data/individuals/moscow_individuals.RDS")


##################################
# Model estimation and bootstrap
##################################

# Model specification
yvarz <- c("LA_ANY_2","LA_DENREMREQ_2")
zonez <- X %>% .[!is.na(get(yvarz[2])),grep("^ZONE_",names(.),value=TRUE)[apply(.SD,2,sum,na.rm=TRUE)>0],.SDcols=grep("^ZONE_",names(.),value=TRUE)]%>%paste0(collapse="+")
tvar <- "N_ARREST" 

# Set parameters
n_sim <- 1000
x_len <- 1000
# ii0 <- 1

# Loop over interactions
for(ii0 in 1:4){

  # Parametric bootstrap SE
  {

    ixvar <- c("party_tsk","party_mem","party_mcy","nkvd_strict")[ii0]
    fevar <- "party"

     # GLM
    suppressMessages({
      formz_base <- Formula::Formula(formula(paste("c(",paste(yvarz,collapse=", "),") ~ log(",tvar,"+1)*",ixvar," + dob_year + male + p_military + p_clergy + manager +", zonez, "| RAY_NAME + nationality_short + party + okonkh")))
    })
    out_base <- fixest::feglm(formz_base,data=X,family="binomial",cluster=~RESI_ID); fixest::etable(out_base)
    
    # FE values
    fez <- data.table(name=fixest::fixef(out_base[[1]])[[fevar]]%>%names(),fe1=fixest::fixef(out_base[[1]])[[fevar]]) %>% .[,fe2:=match(name,names(fixest::fixef(out_base[[2]])[[fevar]]))%>%fixest::fixef(out_base[[2]])[[fevar]][.]]#%>%.[nat!=""]
    ix_list <- c(ixvar=ixvar,ixcon0_1="none",ixcon1_1="member",ixcon0_2="none",ixcon1_2="member",lab0=c("Other victims","Other victims","Other victims","Victim didn't work in NKVD")[ii0],lab1=c("Party nomenklatura","Communist Party member","Party member/candidate/youth","Victim worked in NKVD")[ii0],fevar=fevar)

    # Get interaction variables
    ixcon0_1 <- ix_list["ixcon0_1"]
    ixcon1_1 <- ix_list["ixcon1_1"]
    ixcon0_2 <- ix_list["ixcon0_2"]
    ixcon1_2 <- ix_list["ixcon1_2"]
    lab0 <- ix_list["lab0"]
    lab1 <- ix_list["lab1"]
    
    # Hypothetical x values
    xz <- X[,seq(0,max(get(tvar)),length.out=x_len)]
    X_new <- X %>% .[,.SD,.SDcols=out_base[[2]][["fml_all"]]%>%as.character()%>%gsub("log\\(| \\+ 1\\)|I\\(|\\/100\\)|c\\(|splines\\:\\:bs\\(|\\)|^\\~","",.)%>%str_split(" \\+ | \\* | \\~ ")%>%unlist()%>%unique()%>%.[nchar(.)>0]]%>%.[,lapply(.SD,function(x){ifelse(is.numeric(x),median(x,na.rm=TRUE),table(x)%>%sort()%>%rev()%>%names()%>%.[1])})] %>% .[rep(1,x_len)]  %>% .[,eval(tvar):=xz]
    # Generate predictions, sampling coefficients from multivariate Normal distro
    pb <- txtProgressBar(min = 0, max = n_sim, style = 3)
    set.seed(123); y_predz <- lapply(1:n_sim,function(b0){
      setTxtProgressBar(pb, b0)
      out_copy <- data.table::copy(out_base)
      out_copy[[1]][["coefficients"]] <- mvtnorm::rmvnorm(n=1000,mean=out_copy[[1]][["coefficients"]]%>%unlist(),sigma=out_copy[[1]]%>%.[["cov.scaled"]])%>%apply(2,mean)
      out_copy[[2]][["coefficients"]] <- mvtnorm::rmvnorm(n=1000,mean=out_copy[[2]][["coefficients"]]%>%unlist(),sigma=out_copy[[2]]%>%.[["cov.scaled"]])%>%apply(2,mean)
      yz_pred_bs_1_0 <- predict(out_copy[[1]],newdata=X_new%>%.[,eval(ixvar):=0]%>%.[,eval(fevar):=ixcon0_1],type="response")
      yz_pred_bs_1_1 <- predict(out_copy[[1]],newdata=X_new%>%.[,eval(ixvar):=1]%>%.[,eval(fevar):=ixcon1_1],type="response")
      yz_pred_bs_2_0 <- predict(out_copy[[2]],newdata=X_new%>%.[,eval(ixvar):=0]%>%.[,eval(fevar):=ixcon0_2],type="response")
      yz_pred_bs_2_1 <- predict(out_copy[[2]],newdata=X_new%>%.[,eval(ixvar):=1]%>%.[,eval(fevar):=ixcon1_2],type="response")
      rm(out_copy)
      list(Y1_0=yz_pred_bs_1_0,Y1_1=yz_pred_bs_1_1,Y2_0=yz_pred_bs_2_0,Y2_1=yz_pred_bs_2_1)
    }) 
    suppressMessages({
      y_predz_1_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",1) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
      y_predz_1_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",2) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
      y_predz_2_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",3) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
      y_predz_2_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",4) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    })
  }

  # Make Figure a
  {
    x_ <- log(xz+1)
    xlabz <- pretty(exp(x_)-1)%>%c(.,1,2,3,4,5,10,25)%>%sort()
    atz <- log(xlabz+1)
    rugz <- X[,log(N_ARREST)]
  }
  w_ <- 6; h_ <- w_/1.618

  png(paste0("results/fig",c("7","A39","A310","A311")[ii0],"a.png"),width=w_,height=h_,units="in",res=300)
  {
    par(mar=c(4,3,0.5,0.5))
    plot(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],ylim=range(pretty(quantile(unlist(c(y_predz_1_0,y_predz_1_1)),c(0,1)))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Neighbors executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_1_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
    lines(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],lwd=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_1_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
    lines(x=x_,y=y_predz_1_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
    axis(2,las=2)
    axis(1,at=atz,labels=xlabz)
    suppressWarnings({
      rug(jitter(rugz,amount=.25),lwd=.1)
    })
    legend(x="topleft",lty=c(1,2),lwd=1.618,bty="n",legend=c(lab0,lab1),cex=1.618)
  }
  dev.off()

  # Make Figure b
  png(paste0("results/fig",c("7","A39","A310","A311")[ii0],"b.png"),width=w_,height=h_,units="in",res=300)
  {
    par(mar=c(4,3,0.5,0.5))
    plot(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],ylim=range(pretty(quantile(unlist(c(y_predz_2_0,y_predz_2_1)),c(0,1),na.rm=TRUE))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Neighbors executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_2_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
    lines(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],lwd=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_2_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
    lines(x=x_,y=y_predz_2_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
    axis(2,las=2)
    axis(1,at=atz,labels=xlabz)
    suppressWarnings({
      rug(jitter(rugz,amount=.25),lwd=.1)
    })
  }
  dev.off()

}




##############################################################################
##############################################################################
##############################################################################
## Figures 8, A3.12
## Victim social status (education, managerial class)
##############################################################################
##############################################################################
##############################################################################

rm(list=ls())

##################################
# Prepare environment
##################################

# Load data
X <- readRDS("data/individuals/moscow_individuals.RDS")

##################################
# Model estimation and bootstrap
##################################

# Model specification
yvarz <- c("LA_ANY_2","LA_DENREMREQ_2")
zonez <- X %>% .[!is.na(get(yvarz[2])),grep("^ZONE_",names(.),value=TRUE)[apply(.SD,2,sum,na.rm=TRUE)>0],.SDcols=grep("^ZONE_",names(.),value=TRUE)]%>%paste0(collapse="+")
tvar <- "N_ARREST" 

# Set parameters
n_sim <- 1000
x_len <- 1000
# ii0 <- 1

# Loop over interactions
for(ii0 in 1:2){

  # Parametric bootstrap SE
  {

    ixvar <- c("educ_higher","manager")[ii0]
    fevar <- "nationality_short"

    # GLM
    suppressMessages({
      formz_base <- Formula::Formula(formula(paste("c(",paste(yvarz,collapse=", "),") ~ log(",tvar,"+1)*",ixvar," + dob_year + male + manager + p_military + p_clergy +", zonez, "| RAY_NAME + nationality_short + party + okonkh")))
    })
    out_base <- fixest::feglm(formz_base,data=X,family="binomial",cluster=~RESI_ID); fixest::etable(out_base)

    # FE values
    fez <- data.table(nat=fixest::fixef(out_base[[1]])[[fevar]]%>%names(),fe1=fixest::fixef(out_base[[1]])[[fevar]]) %>% .[,fe2:=match(nat,names(fixest::fixef(out_base[[2]])[[fevar]]))%>%fixest::fixef(out_base[[2]])[[fevar]][.]]%>%.[nat!=""]
    ix_list <- c(ixvar=ixvar,ixcon0_1="рус",ixcon1_1=fez[which.min(fe1-fe1[nat=="рус"]),nat],ixcon0_2="рус",ixcon1_2=fez[which.max(fe2-fe2[nat=="рус"]),nat],lab0=c("Secondary education or less","Other victims")[ii0],lab1=c("University education","Professional-managerial class")[ii0],fevar=fevar)

    # Get interaction variables
    ixcon0_1 <- ix_list["ixcon0_1"]
    ixcon1_1 <- ix_list["ixcon1_1"]
    ixcon0_2 <- ix_list["ixcon0_2"]
    ixcon1_2 <- ix_list["ixcon1_2"]
    lab0 <- ix_list["lab0"]
    lab1 <- ix_list["lab1"]
    
    # Hypothetical x values
    xz <- X[,seq(0,max(get(tvar)),length.out=x_len)]
    X_new <- X %>% .[,.SD,.SDcols=out_base[[2]][["fml_all"]]%>%as.character()%>%gsub("log\\(| \\+ 1\\)|I\\(|\\/100\\)|c\\(|splines\\:\\:bs\\(|\\)|^\\~","",.)%>%str_split(" \\+ | \\* | \\~ ")%>%unlist()%>%unique()%>%.[nchar(.)>0]]%>%.[,lapply(.SD,function(x){ifelse(is.numeric(x),median(x,na.rm=TRUE),table(x)%>%sort()%>%rev()%>%names()%>%.[1])})] %>% .[rep(1,x_len)]  %>% .[,eval(tvar):=xz]
    # Generate predictions, sampling coefficients from multivariate Normal distro
    pb <- txtProgressBar(min = 0, max = n_sim, style = 3)
    set.seed(123); y_predz <- lapply(1:n_sim,function(b0){#print(b0)
      setTxtProgressBar(pb, b0)
      out_copy <- data.table::copy(out_base)
      out_copy[[1]][["coefficients"]] <- mvtnorm::rmvnorm(n=1000,mean=out_copy[[1]][["coefficients"]]%>%unlist(),sigma=out_copy[[1]]%>%.[["cov.scaled"]])%>%apply(2,median)
      out_copy[[2]][["coefficients"]] <- mvtnorm::rmvnorm(n=1000,mean=out_copy[[2]][["coefficients"]]%>%unlist(),sigma=out_copy[[2]]%>%.[["cov.scaled"]])%>%apply(2,median)
      yz_pred_bs_1_0 <- predict(out_copy[[1]],newdata=X_new%>%.[,eval(ixvar):=0],type="response")
      yz_pred_bs_1_1 <- predict(out_copy[[1]],newdata=X_new%>%.[,eval(ixvar):=1],type="response")
      yz_pred_bs_2_0 <- predict(out_copy[[2]],newdata=X_new%>%.[,eval(ixvar):=0],type="response")
      yz_pred_bs_2_1 <- predict(out_copy[[2]],newdata=X_new%>%.[,eval(ixvar):=1],type="response")
      rm(out_copy)
      list(Y1_0=yz_pred_bs_1_0,Y1_1=yz_pred_bs_1_1,Y2_0=yz_pred_bs_2_0,Y2_1=yz_pred_bs_2_1)
    }) 
    suppressMessages({
      y_predz_1_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",1) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
      y_predz_1_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",2) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
      y_predz_2_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",3) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
      y_predz_2_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",4) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    })
  }

  # Make Figure a
  {
    x_ <- log(xz+1)
    xlabz <- pretty(exp(x_)-1)%>%c(.,1,2,3,4,5,10,25)%>%sort()
    atz <- log(xlabz+1)
    rugz <- X[,log(N_ARREST)]
  }
  w_ <- 6; h_ <- w_/1.618

  png(paste0("results/fig",c("8","A312")[ii0],"a.png"),width=w_,height=h_,units="in",res=300)
  {
    par(mar=c(4,3,0.5,0.5))
    plot(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],ylim=range(pretty(quantile(unlist(c(y_predz_1_0,y_predz_1_1)),c(0,1)))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Neighbors executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_1_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
    lines(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],lwd=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_1_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
    lines(x=x_,y=y_predz_1_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
    axis(2,las=2)
    axis(1,at=atz,labels=xlabz)
    suppressWarnings({
      rug(jitter(rugz,amount=.25),lwd=.1)
    })
    legend(x="topleft",lty=c(1,2),lwd=1.618,bty="n",legend=c(lab0,lab1),cex=1.618)
  }

  dev.off()

  # Make Figure b
  png(paste0("results/fig",c("8","A312")[ii0],"b.png"),width=w_,height=h_,units="in",res=300)
  {
    par(mar=c(4,3,0.5,0.5))
    plot(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],ylim=range(pretty(quantile(unlist(c(y_predz_2_0,y_predz_2_1)),c(0,1),na.rm=TRUE))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Neighbors executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_2_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
    lines(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],lwd=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_2_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
    lines(x=x_,y=y_predz_2_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
    axis(2,las=2)
    axis(1,at=atz,labels=xlabz)
    suppressWarnings({
      rug(jitter(rugz,amount=.25),lwd=.1)
    })
  }
  dev.off()

}





##############################################################################
##############################################################################
##############################################################################
## Figures 9, A3.13
## Local political opportunity structure
##############################################################################
##############################################################################
##############################################################################

rm(list=ls())

##################################
# Prepare environment
##################################

# Load data
suppressWarnings({
  X <- sf::read_sf("data/blocks/moscow1938_blocks.geojson") %>% data.table::as.data.table()  %>% .[!is.na(RAY_NAME)] %>% .[!is.na(wr)] %>%  .[,paste0(grep("^pbl_",names(.),value=TRUE),"_b"):=lapply(.SD,function(x){1*(x>0)}),.SDcols=grep("^pbl",names(.))] %>% .[,c("long","lat"):=geometry%>%sf::st_centroid()%>%sf::st_coordinates()%>%data.table::as.data.table()]
})


##################################
# Model estimation and bootstrap
##################################

# Model specification
yvarz <- c("N_TOTAL_R","DENREMREQ_PCTR")
tvar <- "N_EVENT_R"
zonez <- X %>% .[!is.na(get(yvarz[2])),grep("^ZONE_",names(.),value=TRUE)[apply(.SD,2,sum,na.rm=TRUE)>0],.SDcols=grep("^ZONE_",names(.),value=TRUE)]%>%paste0(collapse="+")
ixvarz <- X %>% names(.) %>% grep("^pbl_sec_p3|^pbl_pos_p3",.,value=TRUE) %>% grep("_b$",.,value=TRUE) #%>% paste0("I(",.,"*100)")
ix_list <- lapply(ixvarz,function(ix){c(ixvar=ix,lab0="No state security presence",lab1="State security presence",xlab_="Number of city block residents executed")}) 

# Set parameters
n_sim <- 1000
x_len <- 1000

# Loop over interactions
for(ii0 in 1:2){

  # Parametric bootstrap SE
  {

    ixvar <- ixvarz[ii0]

    # Model 1
    suppressMessages({
      formz_base <- Formula::Formula(formula(paste0("c(log(",yvarz[1],"+1),log(",yvarz[2],"+1)) ~ log(",tvar,"+1)*",ixvar," + TROIKA_DIST + ", zonez, "+ splines::bs(LONG)*splines::bs(LAT) | RAY_NAME")))
    })
    out_base <- fixest::feglm(formz_base, data = X, weights = ~ wr, lean = FALSE, se = "HC1", family="gaussian") ; fixest::etable(out_base)
    # Model 2
    suppressMessages({
      formz_base2 <- Formula::Formula(paste0("c(",paste0(yvarz[2],"",collapse=", "),") ~ log(",tvar,"+1)*",ixvar," + TROIKA_DIST  + ",zonez," + splines::ns(LONG,2)*splines::ns(LAT,2)|  RAY_NAME  ")%>%as.formula())
    })
    out_base2 <- fixest::feglm(formz_base2, data = X, weights = ~ wr, lean = FALSE, family="gaussian", vcov = "HC1") ; fixest::etable(out_base2)

    # Hypothetical x values
    xz <- X[,seq(min(N_EVENT_R,na.rm=TRUE),max(N_EVENT_R,na.rm=TRUE),length.out=x_len)]
    X_new <- X %>% .[,.SD,.SDcols=out_base[[1]][["fml_all"]]%>%as.character()%>%gsub("log\\(| \\+ 1\\)|I\\(|\\/100|\\*100|100\\)|\\,\\ \\d{1}|c\\(|splines\\:\\:bs\\(|splines\\:\\:ns\\(|\\)|^\\~","",.)%>%stringr::str_split(" \\+ | \\* | \\~ ")%>%unlist()%>%unique()%>%.[nchar(.)>0]]%>%.[,lapply(.SD,function(x){ifelse(is.numeric(x),median(x,na.rm=TRUE),table(x)%>%sort()%>%rev()%>%names()%>%.[1])})] %>% .[,ZONE_GOVE:=1] %>% .[rep(1,x_len)]  %>% .[,c("N_EVENT","N_EVENT_R"):=xz]
    # Generate predictions, sampling coefficients from multivariate Normal distro
    pb <- txtProgressBar(min = 0, max = n_sim, style = 3)
    set.seed(123); y_predz <- lapply(1:n_sim,function(b0){#print(b0)
      setTxtProgressBar(pb, b0)
      # Sample coefficients
      out_copy <- data.table::copy(out_base)
      out_copy[[1]][["coefficients"]] <- mvtnorm::rmvnorm(n=100,mean=out_copy[[1]][["coefficients"]]%>%unlist(),sigma=out_copy[[1]]%>%.[["cov.scaled"]])%>%apply(2,median)
      out_copy2 <- data.table::copy(out_base2)
      out_copy2[["coefficients"]] <- mvtnorm::rmvnorm(n=100,mean=out_copy2[["coefficients"]]%>%unlist(),sigma=out_copy2[["cov.scaled"]],method="svd")%>%apply(2,mean)
      # Predictions
      yz_pred_bs_1_0 <- predict(out_copy[[1]],newdata=X_new %>% .[,eval(ixvar):=0],type="response") %>% (function(x){exp(x)-1})
      yz_pred_bs_1_1 <- predict(out_copy[[1]],newdata=X_new %>% .[,eval(ixvar):=1],type="response") %>% (function(x){exp(x)-1})
      yz_pred_bs_2_0 <- predict(out_copy2,newdata=X_new %>% .[,eval(ixvar):=0],type="response")
      yz_pred_bs_2_1 <- predict(out_copy2,newdata=X_new %>% .[,eval(ixvar):=1],type="response")
      rm(out_copy)
      list(Y1_0=yz_pred_bs_1_0,Y1_1=yz_pred_bs_1_1,Y2_0=yz_pred_bs_2_0,Y2_1=yz_pred_bs_2_1)
    }) 
    suppressMessages({
      y_predz_1_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",1) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
      y_predz_1_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",2) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
      y_predz_2_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",3) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
      y_predz_2_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",4) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    })
  }

  # Make Figure a
  {
    x_ <- log(xz+1)
    xlabz <- pretty(exp(x_)-1)%>%c(.,1,2,3,4,5,10,25)%>%sort()
    atz <- log(xlabz+1)
    rugz <- X[,log(get(tvar))]
    lab0 <- ix_list[[ii0]]["lab0"]
    lab1 <- ix_list[[ii0]]["lab1"]
  }
  w_ <- 6; h_ <- w_/1.618

  png(paste0("results/fig",c("9","A313")[ii0],"a.png"),width=w_,height=h_,units="in",res=300)
  {
    par(mar=c(4,3,0.5,0.5))
    plot(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],ylim=range(pretty(quantile(unlist(c(y_predz_1_0,y_predz_1_1)%>%unlist()%>%.[.>0]),c(0,1)))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Number of city block residents executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_1_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
    lines(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],lwd=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_1_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
    lines(x=x_,y=y_predz_1_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
    axis(2,las=2)
    axis(1,at=atz,labels=xlabz)
    suppressWarnings({
      rug(jitter(rugz,amount=.25),lwd=.25)
    })
    legend(x="topleft",lty=c(1,2),lwd=1.618,bty="n",legend=c(lab0,lab1),cex=1.618)
  }
  dev.off()

  # Make Figure b
  png(paste0("results/fig",c("9","A313")[ii0],"b.png"),width=w_,height=h_,units="in",res=300)
  {
    par(mar=c(4,3,0.5,0.5))
    plot(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],ylim=range(pretty(quantile(unlist(c(y_predz_2_0,y_predz_2_1)),c(0,1))))%>%(function(x){x[x<0]<-0;x[x>150]<-150;x}),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Number of city block residents executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_2_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
    lines(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],lwd=1.618)
    polygon(x=c(x_,rev(x_)),y=c(y_predz_2_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
    lines(x=x_,y=y_predz_2_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
    axis(2,las=2)
    axis(1,at=atz,labels=xlabz)
    suppressWarnings({
      rug(jitter(rugz,amount=.25),lwd=.25)
    })
  }
  dev.off()

}




##############################################################################
##############################################################################
##############################################################################
## Table 2
## Government-sanctioned memorials
##############################################################################
##############################################################################
##############################################################################

rm(list=ls())

##################################
# Prepare environment
##################################

# # Custom functions
source("code/functions.R")

# Load data
X <- sf::read_sf("data/blocks/moscow1938_blocks.geojson") %>% data.table::as.data.table()  %>% .[!is.na(RAY_NAME)] %>% .[!is.na(wr)]

##################################
# Estimate models
##################################

# Model specification
zonez <- paste0(grep("^ZONE_",names(X),value=TRUE),collapse="+")
tvarz <- c("N_EVENT_R","EVT_PCTR")
yvarz <- X %>% names() %>% grep("N_PLAQO_Science_Sov_R|N_PLAQO_Party_Sov_R|N_PLAQO_Military_Sov_R|N_PLAQO_Government_Sov_R|N_PLAQO_Arts_Sov_R|N_PLAQO_Medicine_Sov_R|N_PLAQO_Sports_Sov_R",.,value=TRUE)

# Estimate models
plaqo_mods <- lapply(1:length(tvarz),function(t0){
  suppressMessages({
    # Linear
    model_ols <- Formula::Formula(paste0("c(",paste0("log(",yvarz,"+1)",collapse=","),") ~ log(",tvarz[t0],"+1) + TROIKA_DIST  + ",zonez," + splines::bs(LONG)*splines::bs(LAT)|  RAY_NAME")%>%as.formula())
    out_ols <- fixest::feols(model_ols, data = X, weights = ~ wr, lean = TRUE, cluster = ~ RAY_NAME) ; fixest::etable(out_ols)
    ols_mods1 <- feols_coef(out_ols,tvar=tvarz[t0],fez=c("RAY_NAME","BLOK_ZONE"))%>%unique()
    ols_mods1
  })
})%>%data.table::rbindlist()

# Absolute numbers
suppressWarnings({
  xtabz(modmat=plaqo_mods,yz = "N_PLAQO_Science_Sov_R|N_PLAQO_Party_Sov_R|N_PLAQO_Military_Sov_R|N_PLAQO_Government_Sov_R|N_PLAQO_Arts_Sov_R|N_PLAQO_Medicine_Sov_R|N_PLAQO_Sports_Sov_R",xz = "EVENT",modz = "feols|feglm",yordz = c("log(N_PLAQO_Military_Sov_R + 1)","log(N_PLAQO_Party_Sov_R + 1)","log(N_PLAQO_Government_Sov_R + 1)","log(N_PLAQO_Arts_Sov_R + 1)","log(N_PLAQO_Science_Sov_R + 1)","log(N_PLAQO_Medicine_Sov_R + 1)","log(N_PLAQO_Sports_Sov_R + 1)"),xordz = c("log(N_EVENT_R + 1)","log(EVT_PCTR + 1)"),capz = "\\textbf{Severity of repression and government-sanctioned memorialization}.",labbz = "tab:plaqo",fit_width=TRUE,drop_stats="aic",subcaption="Estimates from linear fixed effect models. Treatment is number of block residents executed (logged). Outcome is post-1938 memorial plaques per block (logged). See note under Table \\ref{tab:main} for other details.",fn="results/tab2.tex")
})




##############################################################################
##############################################################################
##############################################################################
## Tables A2.1, A2.4
## Spatial autocorrelation
##############################################################################
##############################################################################
##############################################################################

rm(list=ls())

##################################
# Prepare environment
##################################

# Custom functions
source("code/functions.R")

# Prepare data
X <- sf::read_sf("data/blocks/moscow1938_blocks.geojson") %>% data.table::as.data.table()  %>% .[!is.na(RAY_NAME)] %>% .[!is.na(wr)] 
X_sf <- X %>% sf::st_as_sf()

##################################
# Estimate models
##################################

# Specify models
zonez <- paste0(grep("^ZONE_",names(X),value=TRUE),collapse="+")
tvarz <- c("N_EVENT_R","EVT_PCTR")

# Output for tables
scar_mods <- parallel::mclapply(1:length(tvarz),function(t0){
  yvarz <- c("N_TOTAL_R","DENREMREQ_PCTR")
  scar_mods1 <- lapply(1:length(yvarz),function(y0){print(paste(yvarz[y0],tvarz[t0]))
    out_car <- spmodel::spautor(model_scar(y=yvarz[y0],x=tvarz[t0],logy=TRUE),data=X_sf,spcov_type="car", weights = ~ wr); summary(out_car)
    out_sar <- spmodel::spautor(model_scar(y=yvarz[y0],x=tvarz[t0],logy=TRUE),data=X_sf,spcov_type="sar", weights = ~ wr); summary(out_sar)
    list(
      scar_coef(out_car,tvar=tvarz[t0],fez=c("RAY_NAME","BLOK_ZONE")),
      scar_coef(out_sar,tvar=tvarz[t0],fez=c("RAY_NAME","BLOK_ZONE"))
      ) %>% data.table::rbindlist()
  }) %>% data.table::rbindlist()
  yvarz <- c("DENREMREQ_PCTR","PLA_PCTR")
  scar_mods2 <- lapply(1:length(yvarz),function(y0){print(paste(yvarz[y0],tvarz[t0]))
    out_car <- spmodel::spautor(model_scar(y=yvarz[y0],x=tvarz[t0],logy=FALSE),data=X_sf,spcov_type="car", weights = ~ wr); summary(out_car)
    out_sar <- spmodel::spautor(model_scar(y=yvarz[y0],x=tvarz[t0],logy=FALSE),data=X_sf,spcov_type="sar", weights = ~ wr); summary(out_sar)
    list(
      scar_coef(out_car,tvar=tvarz[t0],fez=c("RAY_NAME","BLOK_ZONE")),
      scar_coef(out_sar,tvar=tvarz[t0],fez=c("RAY_NAME","BLOK_ZONE"))
      ) %>% data.table::rbindlist()
  }) %>% data.table::rbindlist()
  list(scar_mods1,scar_mods2)%>%data.table::rbindlist()
},mc.cores=parallel::detectCores()/4) %>% data.table::rbindlist()


##################################
# Make tables
##################################

# Make Table A2.1
xtabz(modmat=scar_mods,yz = "TOTAL|DENREMREQ_PCTR$",xz = "EVENT",modz = "car|sar",yordz = c("log(N_TOTAL_R + 1)","DENREMREQ_PCTR"),xordz = c("log(N_EVENT_R + 1)","log(EVT_PCTR + 1)"),capz = "\\textbf{Severity of repression and memorialization}, spatial models.",labbz = "tab:scar",fit_width=TRUE,subcaption="Estimates from model in Equation \\ref{eq:scar} with conditional (CAR) and simultaneous autoregressive (SAR) covariance functions. Observations (blocks) weighted by population size. Significance levels (two-tailed): $^{\\dagger} p < 0.1$; $^{*} p < 0.05$; $^{**} p < 0.01$.",fn="results/tabA21.tex")

# Make Table A2.4
xtabz(modmat=scar_mods,yz = "TOTAL|DENREMREQ_PCTR$",xz = "EVT_PCTR",modz = "car|sar",yordz = c("log(N_TOTAL_R + 1)","DENREMREQ_PCTR"),xordz = c("log(N_EVENT_R + 1)","log(EVT_PCTR + 1)"),capz = "\\textbf{Per capita repression and memorialization}, spatial models.",labbz = "tab:scar_pc",fit_width=TRUE,subcaption="Treatment is percent of city block residents executed (logged). See note under Table \\ref{tab:scar} for details.",fn="results/tabA24.tex")





##############################################################################
##############################################################################
##############################################################################
## Figures A2.5, A2.6, A2.7
## Tables A2.2, A2.5
## FRDD
##############################################################################
##############################################################################
##############################################################################

rm(list=ls())

##################################
# Prepare environment
##################################

# Custom functions
source("code/functions.R")

# Load data
moscow1938_districts <- sf::read_sf("data/blocks/moscow1938_districts.geojson")
X <- sf::read_sf("data/blocks/moscow1938_blocks.geojson") %>% data.table::as.data.table()  %>% .[!is.na(RAY_NAME)] %>% .[!is.na(wr)] 

##################################
# First stage
##################################

# Absolute basis
cutoff_m <- 100
tvar <- "N_ARREST_R"

# First stage model
suppressMessages({
  mod_rdd_1 <- Formula::as.Formula(as.formula(paste0("log(y + 1) ~ log(RAYPOP39) + AREA_RAYR + TROIKA_DIST_bar + DIST2INDU")))
})
varz <- mod_rdd_1 %>% as.character() %>% stringr::str_split(" \\+ | \\~ ") %>% sapply(function(x){gsub("log\\(|1\\)|\\)|\\~","",x)})%>%unlist()%>%.[.!=""]

# RDD sample
g <- X %>% data.table::copy() %>% .[,paste0(c("KREMLIN_DIST","TROIKA_DIST",grep("^ZONE_",names(.),value=TRUE)),"_bar"):=lapply(.SD,mean,na.rm=TRUE),.SDcols=c("KREMLIN_DIST","TROIKA_DIST",grep("^ZONE_",names(.),value=TRUE)),by=RAY_NAME] %>% .[,y:=get(tvar)] %>% .[,drop:=lapply(.SD,is.na)%>%as.data.frame()%>%apply(1,sum)%>%(function(x){(x>0)}),.SDcols=varz] %>% .[drop==FALSE] %>% .[,drop:=NULL] %>% .[, `:=`(e, residuals(lfe::felm(mod_rdd_1, data = .)))] %>% .[.[, .(e = mean(e)), by = RAY_NAME], on = "RAY_NAME", `:=`(e.inside, i.e)] %>% .[.[, .(e = mean(e)), by = RAY_NAME], on = c(nearest_RAY_NAME = "RAY_NAME"), `:=`(e.outside, i.e)] %>% .[abs(e.inside - e.outside) > {.[, .(e=mean(e)), by = RAY_NAME] %>% .[,e] %>% sd(na.rm = TRUE) %>% (function(x){x})}] %>% .[abs(distance_RAY_NAME) < cutoff_m] %>% .[, `:=`(d_rayon, distance_RAY_NAME * I(e.inside > e.outside) - distance_RAY_NAME * I(e.inside <= e.outside))] %>% .[,colz := c("white")] %>% .[d_rayon>=0,colz:="black"] %>% .[,pch:=1] %>% .[d_rayon>=0,pch:=16]
bb <- g[,sf::st_as_sf(geometry) %>% SUNGEO::update_bbox() %>% sf::st_bbox()]
X2 <- g[order(d_rayon)]

###
# Make Figure A2.5 (map)
###
w_ <- 5; h_ <- w_
png("results/figA25.png",width=w_,height=h_,units="in",res=300)
par(mar=c(0,0,0,0))
plot(X[,geometry],xlim=bb[c("xmin","xmax")],ylim=bb[c("ymin","ymax")],border=NA,col="grey90")
plot(g[,geometry],add=TRUE,border="black",col=g[,colz])
plot(moscow1938_districts["geometry"],col=NA,border="black",lty=3,add=TRUE)
legend(x="bottomleft",fill=c("black","white"),legend=c(paste0("Treated (N=",g[,.N,by=colz][1,N],")"),paste0("Control (N=",g[,.N,by=colz][2,N],")")),box.col=rgb(1,1,1,.5))
dev.off()

###
# Make Figure A2.6 (discontinuity)
###
r_plot <- rdrobust::rdplot(y = log(1 + g$y), x = g$d_rayon, p=4,col.lines = "black", col.dots = "grey", kernel="uniform",weights=g$wr, hide=TRUE)%>%.[[4]]%>%data.table::as.data.table()
yvalz <- X2[,c(1,2,5,10,25,50,100,250,500,pretty(N_ARREST_R))%>%sort()]
w_ <- 6.5; h_ <- w_/1.618
png("results/figA26.png",width=w_,height=h_,res=300,units="in")
par(mar=c(4,4,0,0))
plot(x=X2[,d_rayon],y=X2[,log(get(tvar)+1)],pch=X2[,pch],bty="n",xlab="Distance to border (meters)",yaxt="n",ylab="Arrests per block",cex.lab=1.618)
lines(x=r_plot[rdplot_x<0,rdplot_x],y=r_plot[rdplot_x<0,rdplot_y])
lines(x=r_plot[rdplot_x>0,rdplot_x],y=r_plot[rdplot_x>0,rdplot_y])
axis(2,at=log(yvalz+1),labels=yvalz,las=1)
segments(x0=X2[,min(pretty(d_rayon))],x1=X2[,max(pretty(d_rayon))],y0=log(yvalz+1),y1=log(yvalz+1),lty=3,lwd=1/1.618)
dev.off()

# Bias-corrected local polynomial estimate (footnote 3)
rdrobust::rdrobust(y = log(1 + g$y), x = g$d_rayon, cluster = g$RAY_NAME) %>% summary()
rdrobust::rdrobust(y = log(1 + g$y), x = g$d_rayon, cluster = g$RAY_NAME) %>% .["coef"]
rdrobust::rdrobust(y = log(1 + g$y), x = g$d_rayon, cluster = g$RAY_NAME) %>% .["se"]
rdrobust::rdrobust(y = log(1 + g$y), x = g$d_rayon, cluster = g$RAY_NAME) %>% .["pv"]

# Balance tests for discontinuity at the border
rout <- function(x){c(x$coef[3], x$ci[3,])}
zonez_ <- g %>% .[,which(lapply(.SD,sum)>1)%>%names(),.SDcols=grep("^ZONE_",names(.),value=TRUE)%>%.[!grepl("_bar$",.)]]
distz_ <- g %>% .[,which(lapply(.SD,sum)>1)%>%names(),.SDcols=grep("^DIST2",names(.),value=TRUE)%>%.[!grepl("_bar$",.)]]
G <- g %>% data.table::copy() %>% 
  .[,.SD,.SDcols=c("d_rayon","TROIKA_DIST","KREMLIN_DIST","RAY_NAME","N_EVENT_R","N_ARREST_R","ARR_PCTR","LONG","LAT","POP_WR",distz_)] %>%
  .[,ln_N_ARREST_R:=log(N_ARREST_R+1)] %>% 
  .[,ln_N_EVENT_R:=log(N_EVENT_R+1)] %>% 
  .[, lapply(.SD, function(x){rdrobust::rdrobust(y = scale(x), x = d_rayon, cluster = RAY_NAME) %>% rout()}), .SDcols = c("ln_N_EVENT_R","LONG","LAT","POP_WR","TROIKA_DIST","KREMLIN_DIST",distz_)]  %>% .[, var := c("b", "l", "u")] %>% data.table::melt(id.var = "var") %>% data.table::dcast(variable ~ var) %>% .[.N:1] %>% .[,ix := .I] %>% .[,label:=match(variable,labmat[,varz])%>%labmat[.,labz]] %>% .[,pch:=ifelse(variable%in%c("ln_N_ARREST_R","ln_N_EVENT_R"),16,1)]

###
# Make Figure A2.7 (balance)
###

w_ <- 6; h_ <- w_/1.618
png(paste0("results/figA27.png"),width=w_,height=h_,units="in",res=300)
par(mar=c(2,14,.5,.5))
plot(x=0,y=0,col=NA,bty="n",yaxt="n",xlab="",ylab="",ylim=G[,range(ix)],xlim=G[,c(l,u)%>%pretty()%>%range()])
axis(2,at=G[,ix],labels=G[,label],las=2)
segments(x0=G[,l],x1=G[,u],y0=G[,ix],y1=G[,ix],lwd=1,lty=1)
points(x=G[,b],y=G[,ix],pch=G[,pch])
segments(x0=0,x1=0,y0=0,y1=nrow(G)+1,lty=2)
segments(x0=G[,c(l,u)%>%range()]%>%pretty(),x1=G[,c(l,u)%>%range()]%>%pretty(),y0=0,y1=nrow(G)+1,lty=3,lwd=.25)
dev.off()


##################################
# Second stage
##################################

# Model specification
zonez <- paste0(grep("^ZONE_",names(X),value=TRUE),collapse="+")
tvarz <- c("N_EVENT_R","EVT_PCTR")

# Estimate model
rdd_mods <- lapply(1:length(tvarz),function(t0){
  yvarz <- c("N_TOTAL_R","N_DENREMREQ_R")
  model_rdd <- model_rddz(y=yvarz,x=tvarz[t0],logy=TRUE)
  out_rdd1 <- fixest::feols(model_rdd, data = g, weights = ~ wr, lean = TRUE, cluster=~RAY_NAME) ; fixest::etable(out_rdd1)
  rdd_mods1_ <- feols_coef(modz=out_rdd1,tvar=tvarz[t0],fez=NULL)%>%.[,model:="rdd"]
  rdd_mods1_
  yvarz <- c("DENREMREQ_PCTR","PLA_PCTR")  
  model_rdd <- model_rddz(y=yvarz,x=tvarz[t0],logy=FALSE)
  out_rdd2 <- fixest::feols(model_rdd, data = g, weights = ~ wr, lean = TRUE, cluster=~RAY_NAME) ; fixest::etable(out_rdd2)
  rdd_mods2_ <- feols_coef(modz=out_rdd2,tvar=tvarz[t0],fez=NULL)%>%.[,model:="rdd"]
  
  list(rdd_mods1_,rdd_mods2_)%>%data.table::rbindlist()
}) %>% data.table::rbindlist()

###
# Make Table A2.2
###

xtabz(modmat=rdd_mods,yz = "TOTAL|DENREMREQ_PCTR",xz = "EVENT",modz = "rdd",yordz = c("log(N_TOTAL_R + 1)","DENREMREQ_PCTR"),xordz = c("fit_log(N_EVENT_R + 1)","fit_log(EVT_PCTR + 1)"),capz = "\\textbf{Severity of repression and memorialization}, FRDD estimates.",labbz = "tab:rdd",fit_width=TRUE,subcaption="Estimates from fuzzy regression discontinuity design. Treatment is number of city block residents executed (logged). Outcome is log-transformed. Robust standard errors in parentheses, clustered by rayon. Observations (blocks) weighted by population size. Significance levels (two-tailed): $^{\\dagger} p < 0.1$; $^{*} p < 0.05$; $^{**} p < 0.01$.",fn="results/tabA22.tex")

###
# Make Table A2.5
###

xtabz(modmat=rdd_mods,yz = "TOTAL|DENREMREQ_PCTR",xz = "EVT_PCTR",modz = "rdd",yordz = c("log(N_TOTAL_R + 1)","PLA_PCTR","DENREMREQ_PCTR","I(DENREMREQ_PCTR/100)"),xordz = c("fit_log(N_EVENT_R + 1)","fit_log(EVT_PCTR + 1)"),capz = "\\textbf{Per capita repression and memorialization}, FRDD estimates.",labbz = "tab:rdd_pc",fit_width=TRUE,subcaption="Estimates from fuzzy regression discontinuity design. Treatment is percent of city block residents executed (logged). Outcome is log-transformed. Robust standard errors in parentheses, clustered by rayon. Observations (blocks) weighted by population size. Significance levels (two-tailed): $^{\\dagger} p < 0.1$; $^{*} p < 0.05$; $^{**} p < 0.01$.",fn="results/tabA25.tex")


##############################################################################
##############################################################################
##############################################################################
## Table A3.6
## Predictors of repression
##############################################################################
##############################################################################
##############################################################################

rm(list=ls())

##################################
# Prepare environment
##################################

# Custom functions
source("code/functions.R")

# Load data
X <- sf::read_sf("data/blocks/moscow1938_blocks.geojson") %>% data.table::as.data.table()  %>% .[!is.na(RAY_NAME)] %>% .[!is.na(wr)] 

##################################
# Bayesian model averaging
##################################

# Specify models
zonez <- paste0(grep("^ZONE_",names(X),value=TRUE),collapse="+")
zonez_ <- X %>% .[,which(lapply(.SD,sum)>1)%>%names(),.SDcols=grep("^ZONE_",names(.),value=TRUE)%>%.[!grepl("_bar$",.)]]
rayz_ <- X[,unique(RAY_NAME)%>%sort()]

# Extract data.frame
X_df <- X %>% data.table::copy() %>% .[,LPOP := log(POP_WR+1)] %>% .[,INDU_DIST := DIST2INDU/1000] %>% .[,AREA := AREA_RESI/1000000] %>% .[, eval(rayz_) := lapply(rayz_, function(x){1*(RAY_NAME == x)})] %>% .[,.SD,.SDcols=c("LPOP","TROIKA_DIST","INDU_DIST","LONG","LAT","AREA",rayz_[-1])] %>% as.data.frame()

# Fit linear
y <- X %>% .[,log(N_EVENT_R+1)] 
bma_mod_1 <- BMA::bic.glm(x=X_df,y=y,glm.family="gaussian",strict=FALSE,OR=10^3)
# Fit QP
y <- X %>% .[,N_EVENT_R] 
bma_mod_2 <- BMA::bic.glm(x=X_df,y=y,glm.family="quasipoisson",strict=FALSE,OR=10^3)

##################################
# Make Table A3.6
##################################

xtabz_bma(
  bma_mod_1,
  bma_mod_2,
  labmat=labmat,
  capz="\\textbf{Predictors of Repression}, Bayesian Model Averaging",
  labbz="app:tab_bma",
  festart="^Area",
  fit_width=TRUE,
  fn="results/tabA36.tex")



##############################################################################
##############################################################################
##############################################################################
## Tables A3.7
## Correlates of victim rehabilitation
##############################################################################
##############################################################################
##############################################################################

rm(list=ls())

##################################
# Prepare environment
##################################

# Custom functions
source("code/functions.R")

# Load data
X <- readRDS("data/individuals/memo_rehab.RDS") %>% .[,nationality_short:=nationality_short%>%match(labmat_cat[,varz])%>%labmat_cat[.,labz]] %>% .[,okonkh:=okonkh%>%match(labmat_cat[,varz])%>%labmat_cat[.,labz]] %>% .[,education:=education%>%match(labmat_cat[,varz])%>%labmat_cat[.,labz]]

##################################
# Model estimation
##################################

# Fit model
yvarz <- "rehab"
suppressMessages({
  formz <- Formula::Formula(formula(paste(yvarz," ~ i(education, ref = 'secondary') + i(nationality_short, ref='RUS') + i(okonkh,ref='government') + manager + p_clergy + p_military + dob_year | SSR")))
})
out_glm <- fixest::feglm(formz,data=X,family="binomial",nthreads=8,cluster=~SSR); fixest::etable(out_glm)

##################################
# Make table A3.7
##################################

# Export table
fixest::etable(out_glm,
  fitstat=c("n","bic"),
  dict=c(rehab="Rehabilitated",education="Education",nationality_short="Ethnicity",okonkh="Industry",manager="Manager",p_clergy="Clergy",p_military="Military",SSR="Republic of birth",dob_year="Birth year"),
  title="\\textbf{Who was most likely to receive rehabilitation?} Sample includes all repression victims in Memorial's `Victims of Stalinist Terror' database. Reference categories for education, ethnicity, industry and republic are `secondary,' `Russian,' `government' and `RSFSR.'",label="tab:rehab",coef.just="c",placement="H",
  notes="",signif.code=c("***"=0.001, "**"=0.01, "*"=0.05, "."=0.10),
  fontsize="scriptsize",tex=TRUE,se.below=FALSE,digits=2,replace=TRUE, family=TRUE, postprocess.tex = adjust_spacing,file="results/tabA37.tex")



##############################################################################
##############################################################################
##############################################################################
## Tables A3.8, A3.9
## Individual-level coefficients
##############################################################################
##############################################################################
##############################################################################

rm(list=ls())

##################################
# Prepare environment
##################################

# Custom functions
source("code/functions.R")

# Load data
X <- readRDS("data/individuals/moscow_individuals.RDS") 

##################################
# Repression of neighbors 
##################################

# Model specification
zonez <- paste0(grep("^ZONE_",names(X),value=TRUE),collapse="+")
yvarz <- c("LA_ANY_2","LA_DENREMREQ_2")
tvar <- "N_ARREST"

# Estimate models
suppressMessages({
  # OLS
  formz <- Formula::Formula(formula(paste("c(",paste("log(",yvarz,"+1)",collapse=", "),") ~ log(",tvar,"+1) + dob_year + male + p_military + p_clergy + manager +", zonez, "| RAY_NAME + nationality_short + party + okonkh")))
  out_1 <- fixest::feols(formz,data=X,cluster=~RESI_ID); fixest::etable(out_1)
  ind_mods1 <- feols_coef(out_1,tvar=tvar,fez=c("RAY_NAME","BLOK_ZONE","nationality_short","okonkh"))
  # GLM
  formz <- Formula::Formula(formula(paste("c(",paste(yvarz,collapse=", "),") ~ log(",tvar,"+1) + dob_year + male + p_military + p_clergy + manager +", zonez, "| RAY_NAME + nationality_short + party + okonkh")))
  out_2 <- fixest::feglm(formz,data=X,family="binomial",cluster=~RESI_ID); fixest::etable(out_2)
  ind_mods2 <- feols_coef(out_2,tvar="N_ARREST",fez=c("RAY_NAME","BLOK_ZONE","nationality_short","okonkh"))
  ind_mods <- list(ind_mods1,ind_mods2)%>%data.table::rbindlist()
})

###
# Make Table A3.8
###

xtabz(modmat=ind_mods%>%.[fe1%in%"RAY_NAME"],yz = "LA_ANY|LA_DENREMREQ",xz = "N_ARREST",modz = "feols|feglm",yordz = c("log(LA_ANY_2 + 1)","LA_ANY_2","log(LA_DENREMREQ_2 + 1)","LA_DENREMREQ_2"),xordz = c("log(N_ARREST + 1)"),capz = "\\textbf{Severity of repression vs. neighbors and memorialization}, individual-level.",labbz = "tab:indiv",fit_width=TRUE,subcaption="Treatment is number of residents from same street address executed (logged). Robust standard errors in parentheses, clustered by rayon. Model specification in Equation \\ref{eq:glm}. Significance levels (two-tailed): $^{\\dagger} p < 0.1$; $^{*} p < 0.05$; $^{**} p < 0.01$.",fn="results/tabA38.tex")

##################################
# Repression of coworkers
##################################

# Model specification
tvar <- "N_ARREST_WORK"

# Estimate models
suppressMessages({
  # OLS
  formz <- Formula::Formula(formula(paste("c(",paste("log(",yvarz,"+1)",collapse=", "),") ~ log(",tvar,"+1) + dob_year + male + p_military + p_clergy + manager +", zonez, "| RAY_NAME + nationality_short + party + okonkh")))
  out_1 <- fixest::feols(formz,data=X,cluster=~RESI_ID); fixest::etable(out_1)
  wind_mods1 <- feols_coef(out_1,tvar=tvar,fez=c("RAY_NAME","BLOK_ZONE","nationality_short","okonkh"))
  # GLM
  formz <- Formula::Formula(formula(paste("c(",paste(yvarz,collapse=", "),") ~ log(",tvar,"+1) + dob_year + male + p_military + p_clergy + manager +", zonez, "| RAY_NAME + nationality_short + party + okonkh")))
  out_2 <- fixest::feglm(formz,data=X,family="binomial",cluster=~RESI_ID); fixest::etable(out_2)
  wind_mods2 <- feols_coef(out_2,tvar="N_ARREST",fez=c("RAY_NAME","BLOK_ZONE","nationality_short","okonkh"))
  wind_mods <- list(wind_mods1,wind_mods2)%>%data.table::rbindlist()
})

###
# Make Table A3.9
###

xtabz(modmat=wind_mods,yz = "LA_ANY|LA_DENREMREQ",xz = "N_ARREST_WORK",modz = "feols|feglm",yordz = c("log(LA_ANY_2 + 1)","LA_ANY_2","log(LA_DENREMREQ_2 + 1)","LA_DENREMREQ_2"),xordz = c("log(N_ARREST + 1)"),capz = "\\textbf{Severity of repression vs. co-workers and memorialization}, individual-level.",labbz = "tab:windiv",fit_width=TRUE,subcaption="Treatment is number of repression with same employer as victim (logged). See note under Table \\ref{tab:indiv} for details.",fn="results/tabA39.tex")




##############################################################################
##############################################################################
##############################################################################
## Figures A3.14, A3.15
## Coordination costs
##############################################################################
##############################################################################
##############################################################################

rm(list=ls())

##################################
# Prepare environment
##################################

# Custom functions
source("code/functions.R")

# Load data
X <- sf::read_sf("data/blocks/moscow1938_blocks.geojson") %>% data.table::as.data.table()  %>% .[!is.na(RAY_NAME)] %>% .[!is.na(wr)] %>%  .[,paste0(grep("^pbl_",names(.),value=TRUE),"_b"):=lapply(.SD,function(x){1*(x>0)}),.SDcols=grep("^pbl",names(.))] %>% 
  .[,corpsize_b:=1*(corp_size_p1>median(corp_size_p1,na.rm=TRUE))] %>% 
  .[,residential_b:=1*(sector_1_p1+sector_2_p1==0)] %>% 
  .[,c("long","lat"):=geometry%>%sf::st_centroid()%>%sf::st_coordinates()%>%data.table::as.data.table()]

##################################
# Model estimation and bootstrap
##################################

# Model specification
yvarz <- c("N_TOTAL_R","DENREMREQ_PCTR")
zonez <- X %>% .[!is.na(get(yvarz[2])),grep("^ZONE_",names(.),value=TRUE)[apply(.SD,2,sum,na.rm=TRUE)>0],.SDcols=grep("^ZONE_",names(.),value=TRUE)]%>%paste0(collapse="+")
tvar <- "N_EVENT_R"
ixvarz <- X %>% names(.) %>% grep("^corpsize|^residential",.,value=TRUE) %>% grep("_b$",.,value=TRUE)
ix_list <- lapply(seq_along(ixvarz),function(ix){c(ixvar=ixvarz[ix],lab0=c("Independently-owned firm","Non-residential")[ix],lab1=c("Corporate ownership","Residential block")[ix],xlab_="Number of city block residents executed")}) #%>% 

# Set parameters
n_sim <- 1000
x_len <- 1000

# Loop over interactions
for(ii0 in seq_along(ixvarz)){

  tryCatch({
    suppressMessages({

      # Parametric bootstrap SE
      {

        ixvar <- ixvarz[ii0]

        # Model 1
        formz_base <- Formula::Formula(formula(paste0("c(",paste0("log(",yvarz%>%.[-length(.)],"+1)",collapse=","),",DENREMREQ_PCTR) ~ log(",tvar,"+1)*",ixvar," + TROIKA_DIST + ", zonez, "+ splines::bs(LONG)*splines::bs(LAT) | RAY_NAME")))
        out_base <- fixest::feglm(formz_base, data = X, weights = ~ wr, lean = FALSE, se = "HC1", family="gaussian") ; fixest::etable(out_base)
        # Model 2
        formz_base2 <- Formula::Formula(paste0("c(",paste0(yvarz[length(yvarz)],"",collapse=", "),") ~ log(",tvar,"+1)*",ixvar," + TROIKA_DIST  + ",zonez," + splines::ns(LONG,3)+splines::ns(LAT,3)|  RAY_NAME  ")%>%as.formula())
        out_base2 <- fixest::feglm(formz_base2, data = X, weights = ~ wr, lean = FALSE, family="gaussian", vcov = "HC1") ; fixest::etable(out_base2)
       
        # Hypothetical x values
        xz <- X[,seq(min(N_EVENT_R,na.rm=TRUE),max(N_EVENT_R,na.rm=TRUE),length.out=x_len)]
        X_new <- X %>% .[,.SD,.SDcols=out_base[[1]][["fml_all"]]%>%as.character()%>%gsub("log\\(| \\+ 1\\)|I\\(|\\/100|\\*100|100\\)|\\,\\ \\d{1}|c\\(|splines\\:\\:bs\\(|splines\\:\\:ns\\(|\\)|^\\~","",.)%>%stringr::str_split(" \\+ | \\* | \\~ ")%>%unlist()%>%unique()%>%.[nchar(.)>0]]%>%.[,lapply(.SD,function(x){ifelse(is.numeric(x),median(x,na.rm=TRUE),table(x)%>%sort()%>%rev()%>%names()%>%.[1])})] %>% .[rep(1,x_len)] %>% .[,ZONE_GOVE:=1]  %>% .[,c("N_EVENT","N_EVENT_R"):=xz]
        # Generate predictions, sampling coefficients from multivariate Normal distro
        pb <- txtProgressBar(min = 0, max = n_sim, style = 3)
        set.seed(123); y_predz <- lapply(1:n_sim,function(b0){#print(b0)
          setTxtProgressBar(pb, b0)
          # Sample coefficients
          out_copy <- data.table::copy(out_base)
          for(yy0 in 1:length(out_copy)){
            out_copy[[yy0]][["coefficients"]] <- mvtnorm::rmvnorm(n=100,mean=out_copy[[yy0]][["coefficients"]]%>%unlist(),sigma=out_copy[[yy0]]%>%.[["cov.scaled"]])%>%apply(2,median)
          }
          out_copy2 <- data.table::copy(out_base2)
          lengths(out_copy2)
          out_copy2[["coefficients"]] <- mvtnorm::rmvnorm(n=100,mean=out_copy2[["coefficients"]]%>%unlist(),sigma=out_copy2[["cov.scaled"]],method="svd")%>%apply(2,mean)
          # Predictions
          yz_pred_bs_1_0 <- predict(out_copy[[1]],newdata=X_new %>% .[,eval(ixvar):=0],type="response") %>% (function(x){exp(x)-1})
          yz_pred_bs_1_1 <- predict(out_copy[[1]],newdata=X_new %>% .[,eval(ixvar):=1],type="response") %>% (function(x){exp(x)-1})
          yz_pred_bs_2_0 <- predict(out_copy2,newdata=X_new %>% .[,eval(ixvar):=0],type="response")
          yz_pred_bs_2_1 <- predict(out_copy2,newdata=X_new %>% .[,eval(ixvar):=1],type="response")
          rm(out_copy)
          list(Y1_0=yz_pred_bs_1_0,Y1_1=yz_pred_bs_1_1,Y2_0=yz_pred_bs_2_0,Y2_1=yz_pred_bs_2_1)
        }) 
        suppressMessages({
          y_predz_1_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",1) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
          y_predz_1_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",2) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
          y_predz_2_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",3) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
          y_predz_2_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",4) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
        })

        ###
        # Figures
        ###

        # Log scale
        {
          x_ <- log(xz+1)
          xlabz <- pretty(exp(x_)-1)%>%c(.,1,2,3,4,5,10,25)%>%sort()
          atz <- log(xlabz+1)
          rugz <- X[,log(get(tvar))]
          lab0 <- ix_list[[ii0]]["lab0"]
          lab1 <- ix_list[[ii0]]["lab1"]
        }
        w_ <- 6; h_ <- w_/1.618
        # Make Figure a
        png(paste0("results/figA",c("314","315")[ii0],"a.png"),width=w_,height=h_,units="in",res=300)
        {
          par(mar=c(4,3,0.5,0.5))
          plot(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],ylim=range(pretty(quantile(unlist(c(y_predz_1_0,y_predz_1_1)),c(0,1)))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Number of city block residents executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
          polygon(x=c(x_,rev(x_)),y=c(y_predz_1_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
          lines(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],lwd=1.618)
          polygon(x=c(x_,rev(x_)),y=c(y_predz_1_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
          lines(x=x_,y=y_predz_1_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
          axis(2,las=2)
          axis(1,at=atz,labels=xlabz)
          suppressWarnings({
            rug(jitter(rugz,amount=.25),lwd=.25)
          })
          legend(x="topleft",lty=c(1,2),lwd=1.618,bty="n",legend=c(lab0,lab1),cex=1.618)
        }
        dev.off()
       

        # Make Figure b
        png(paste0("results/figA",c("314","315")[ii0],"b.png"),width=w_,height=h_,units="in",res=300)
        {
          par(mar=c(4,3,0.5,0.5))
          plot(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],ylim=range(pretty(quantile(unlist(c(y_predz_2_0,y_predz_2_1)),c(0,1))))%>%(function(x){x[x<0]<-0;x[x>150]<-150;x}),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Number of city block residents executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
          polygon(x=c(x_,rev(x_)),y=c(y_predz_2_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
          lines(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],lwd=1.618)
          polygon(x=c(x_,rev(x_)),y=c(y_predz_2_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
          lines(x=x_,y=y_predz_2_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
          axis(2,las=2)
          axis(1,at=atz,labels=xlabz)
          suppressWarnings({
            rug(jitter(rugz,amount=.25),lwd=.25)
          })
        }
        dev.off()
      }

    })
  },error=function(e){print(e)})
}


##############################################################################
##############################################################################
##############################################################################
## Table A4.11
## Figures A4.17, A4.18, A4.19, A4.20, A4.21
## Saint Petersburg
##############################################################################
##############################################################################
##############################################################################


##################################
# Maps of Leningrad (Figure A4.17)
##################################

rm(list=ls())

# Load data
len1936_districts <- sf::read_sf("data/blocks/leningrad1936_districts.geojson")
len1935_blocks <- sf::read_sf("data/blocks/leningrad1935_blocks.geojson")
bb <- len1936_districts %>% data.table::as.data.table()  %>% .[,sf::st_as_sf(geometry) %>% SUNGEO::update_bbox() %>% sf::st_bbox()]
suppressWarnings({
  suppressMessages({
    arrests <- readRDS("data/individuals/leningrad_individuals.RDS") %>% sf::st_as_sf(coords=c("osm_long","osm_lat"),na.fail=FALSE,remove=FALSE,crs=4326) %>% sf::st_intersection(bb%>%sf::st_as_sfc())
  })
})

# Figure a
w_ <- 3.5; h_ <- w_
png("results/figA417a.png",width=w_,height=h_,units="in",res=300)
par(mar=c(0,0,0,0))
plot(0,0,xlim=bb[c("xmin","xmax")],ylim=bb[c("ymin","ymax")],col=NA,bty="n",xaxt="n",yaxt="n")
plot(len1935_blocks["geometry"],col="grey80",border=NA,lty=1,add=TRUE,lwd=.5)
plot(len1936_districts["geometry"],col=NA,border="black",lty=1,add=TRUE,lwd=.5)
dev.off()

# Figure b
w_ <- 3.5; h_ <- w_
png("results/figA417b.png",width=w_,height=h_,units="in",res=300)
par(mar=c(0,0,0,0))
plot(len1935_blocks["geometry"],xlim=bb[c("xmin","xmax")],ylim=bb[c("ymin","ymax")],col="grey80",border=NA,bty="n",xaxt="n",yaxt="n",reset=FALSE)
plot(arrests["geometry"],add=TRUE,col="black",pch=16,cex=.2)
dev.off()



##################################
# Block-level analysis
##################################

rm(list=ls())

###
# Prepare environment
###

# Custom functions
source("code/functions.R")

# Prepare data
X <- sf::read_sf("data/blocks/leningrad1935_blocks.geojson") %>% data.table::as.data.table()  %>% .[!is.na(RAY1936_NAME)] %>% .[!is.na(wr)] 

###
# Model estimation
###

# Estimate models
zonez <- paste0(grep("^ZONE_",names(X),value=TRUE),collapse="+")
tvarz <- c("N_EVENT_R","EVT_PCTR")
ols_mods <- lapply(1:length(tvarz),function(t0){
  suppressMessages({
    # Linear
    model_ols <- Formula::Formula(paste0("c(log(N_TOTAL_R+1),DENREMREQ_PCTR) ~ log(",tvarz[t0],"+1) + ",zonez," + splines::bs(LONG)*splines::bs(LAT)|  RAY1936_NAME  ")%>%as.formula())
    out_ols <- fixest::feols(model_ols, data = X, weights = ~ wr, lean = TRUE, cluster = ~ RAY1936_NAME) ; fixest::etable(out_ols)
    ols_mods1 <- feols_coef(out_ols,tvar=tvarz[t0],fez=c("RAY_NAME","BLOK_ZONE"))
    # Binomial
    model_glm <- Formula::Formula(paste0("c(DENREMREQ_PCTR/100,DENREMREQ_PCTR/100) ~ log(",tvarz[t0],"+1)  + ",zonez," + splines::bs(LONG)*splines::bs(LAT)|  RAY1936_NAME  ")%>%as.formula())
    out_glm <- fixest::feglm(model_glm, data = X, weights = ~ wr, lean = FALSE, family="binomial", cluster = ~ RAY1936_NAME) ; fixest::etable(out_glm)
    glm_mods1 <- feols_coef(out_glm,tvar=tvarz[t0],fez=c("RAY_NAME","BLOK_ZONE"))  
    list(ols_mods1,glm_mods1)%>%data.table::rbindlist()%>%unique()
  })
})%>%data.table::rbindlist()

###
# Table A4.11
###

xtabz(modmat=ols_mods,yz = "TOTAL|DENREMREQ_PCTR",xz = "EVENT",modz = "feols|feglm",yordz = c("log(N_TOTAL_R + 1)","DENREMREQ_PCTR","DENREMREQ_PCTR/100"),xordz = c("log(N_EVENT_R + 1)","log(EVT_PCTR + 1)"),capz = "\\textbf{Severity of repression and memorialization} (Saint Petersburg).",labbz = "tab:main_spb",fit_width=TRUE,drop_stats="aic",subcaption="Estimates from Linear and Binomial fixed effect regression models. Treatment is number of city block residents executed (logged). Outcome is log-transformed in Linear model, rescaled as proportion between 0 and 1 in Binomial model. Robust standard errors in parentheses, clustered by rayon. All models include spatial spline and block-level covariates. Observations (blocks) weighted by population size. Significance levels (two-tailed): $^{\\dagger} p < 0.1$; $^{*} p < 0.05$; $^{**} p < 0.01$.",fn="results/tabA411.tex")


##################################
# Individual-level analysis
##################################

rm(list=ls())

###
# Prepare environment
###

# Custom functions
source("code/functions.R")

# Load data
X <- readRDS("data/individuals/leningrad_individuals.RDS")


###
# Model estimation and bootstrap
###

# Model specification
yvarz <- c("LA_ANY","LA_DENREMREQ")
zonez <- X %>% .[!is.na(get(yvarz[2])),grep("^ZONE_",names(.),value=TRUE)[apply(.SD,2,sum,na.rm=TRUE)>0],.SDcols=grep("^ZONE_",names(.),value=TRUE)]%>%paste0(collapse="+")
tvar <- "N_EVENT_R"
xlab_<-"Neighbors executed"
suppressMessages({
  formz <- Formula::Formula(formula(paste("c(",paste(yvarz,collapse=", "),") ~ log(",tvar,"+1) + male + ",zonez," + manager + p_clergy + p_military  | RAY1936 + nationality_short + okonkh + party")))
})

# Fit model
out_ <- fixest::feglm(formz,data=X ,family="binomial",vcov="HC1",nthreads=1); fixest::etable(out_)

# Set parameters
n_sim <- 1000
x_len <- 1000
xz <- X[,seq(0,max(get(tvar)),length.out=x_len)]

# Parametric bootstrap SE
{

  # Hypothetical x values
  X_new <- X %>% .[,.SD,.SDcols=out_[[2]][["fml_all"]]%>%as.character()%>%gsub("log\\(| \\+ 1\\)|I\\(|\\/100\\)|c\\(|splines\\:\\:bs\\(|\\)|^\\~","",.)%>%str_split(" \\+ | \\* | \\~ ")%>%unlist()%>%unique()%>%.[nchar(.)>0]]%>%.[,lapply(.SD,function(x){ifelse(is.numeric(x),median(x,na.rm=TRUE),table(x)%>%sort()%>%rev()%>%names()%>%.[1])})] %>% .[rep(1,x_len)]  %>% .[,eval(tvar):=xz]
  # Generate predictions, sampling coefficients from multivariate Normal distro
  pb <- txtProgressBar(min = 0, max = n_sim, style = 3)
  # b0 <- 1
  set.seed(123); y_predz <- lapply(1:n_sim,function(b0){#print(b0)
    setTxtProgressBar(pb, b0)
    out_copy <- data.table::copy(out_)
    out_copy[[1]][["coefficients"]] <- mvtnorm::rmvnorm(n=1000,mean=out_copy[[1]][["coefficients"]]%>%unlist(),sigma={out_copy[[1]]%>%.[["cov.scaled"]]},method="svd")%>%apply(2,function(x){pdrop(x)%>%sample()})%>%.[1,]
    # %>%apply(2,mean)
    out_copy[[2]][["coefficients"]] <- mvtnorm::rmvnorm(n=1000,mean=out_copy[[2]][["coefficients"]]%>%unlist(),sigma={out_copy[[2]]%>%.[["cov.scaled"]]},method="svd")%>%apply(2,function(x){pdrop(x)%>%sample()})%>%.[1,]
    # %>%apply(2,mean)
    yz_pred_bs_1 <- predict(out_copy[[1]],newdata=X_new,type="response")
    yz_pred_bs_2 <- predict(out_copy[[2]],newdata=X_new,type="response")
    rm(out_copy)
    # list(Y1_0=yz_pred_bs_1_0,Y1_1=yz_pred_bs_1_1,Y2_0=yz_pred_bs_2_0,Y2_1=yz_pred_bs_2_1)
    list(LA_ANY=yz_pred_bs_1,LA_DENREMREQ=yz_pred_bs_2)
  }) 
  suppressMessages({
    y_predz_1 <- y_predz %>% .[lapply(y_predz,length)==2] %>% sapply("[",1) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    y_predz_2 <- y_predz %>% .[lapply(y_predz,length)==2] %>% sapply("[",2) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
  })
}


###
# Figure A4.18
###

# Log scale
{
  x_ <- log(xz+1)
  xlabz <- pretty(exp(x_)-1)%>%c(.,1,2,3,4,5,10,25)%>%sort()
  atz <- log(xlabz+1)
  rugz <- X[,log(get(tvar))]
}
w_ <- 6; h_ <- w_/1.618

# Figure a
png("results/figA418a.png",width=w_,height=h_,units="in",res=300)
{
  par(mar=c(4,3,0.5,0.5))
  plot(x=x_,y=y_predz_1[,apply(.SD,1,median)],ylim=range(pretty(quantile(as.matrix(y_predz_1),c(0,1)))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab=xlab_,xaxt="n",xlim=c(0,max(x_)),cex.lab=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col="grey90")
  lines(x=x_,y=y_predz_1[,apply(.SD,1,median)],lwd=1.618)
  axis(2,las=2)
  axis(1,at=atz,labels=xlabz)
  suppressWarnings({
    rug(jitter(rugz,amount=.25),lwd=.1)
  })
}
dev.off()

# Figure 3b
png("results/figA418b.png",width=w_,height=h_,units="in",res=300)
{
  par(mar=c(4,3,0.5,0.5))
  plot(x=x_,y=y_predz_2[,apply(.SD,1,median)],ylim=range(pretty(quantile(as.matrix(y_predz_2),c(0,1)))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab=xlab_,xaxt="n",xlim=c(0,max(x_)),cex.lab=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_2[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col="grey90")
  lines(x=x_,y=y_predz_2[,apply(.SD,1,median)],lwd=1.618)
  axis(2,las=2)
  axis(1,at=atz,labels=xlabz)
  suppressWarnings({
    rug(jitter(rugz,amount=.25),lwd=.1)
  })
}
dev.off()



##################################
# Victim identity
##################################

rm(list=ls())

###
# Prepare environment
###

# Custom functions
source("code/functions.R")

# Load data
X <- readRDS("data/individuals/leningrad_individuals.RDS")

###
# Model estimation and bootstrap
###

# Model specification
yvarz <- c("LA_ANY","LA_DENREMREQ")
zonez <- X %>% .[!is.na(get(yvarz[2])),grep("^ZONE_",names(.),value=TRUE)[apply(.SD,2,sum,na.rm=TRUE)>0],.SDcols=grep("^ZONE_",names(.),value=TRUE)]%>%paste0(collapse="+")
tvar <- "N_EVENT_100"
tvar <- "N_EVENT_R"
xlab_<-"Neighbors executed"
xz <- X[,max(get(tvar),na.rm=TRUE)] %>% (function(x){seq(0,x,length.out=100)})

# Set parameters
n_sim <- 1000
x_len <- 1000

# Parametric bootstrap SE
{

  ixvar <- "eth_nonrus"
  fevar <- "nationality_short"

  # GLM
  suppressMessages({
    formz_base <- Formula::Formula(formula(paste("c(",paste(yvarz,collapse=", "),") ~ log(",tvar,"+1)*",ixvar," + male + ",zonez," | RAY1936 + nationality_short + okonkh + party")))
  })
  out_base <- fixest::feglm(formz_base,data=X,family="binomial",vcov="HC1"); fixest::etable(out_base)

  # FE values
  fez <- data.table(nat=fixest::fixef(out_base[[1]])[[fevar]]%>%names(),fe1=fixest::fixef(out_base[[1]])[[fevar]]) %>% .[,fe2:=match(nat,names(fixest::fixef(out_base[[2]])[[fevar]]))%>%fixest::fixef(out_base[[2]])[[fevar]][.]]%>%.[nat!=""]%>%na.omit()
  ix_list <- c(ixvar=ixvar,ixcon0_1="RUS",ixcon1_1=fez[order(fe1-fe1[nat=="RUS"]),][nat!="RUS",nat][1],ixcon0_2="RUS",ixcon1_2=fez[order(fe2-fe2[nat=="RUS"])%>%rev()%>%.[-1],][nat!="RUS",nat][1],lab0="Russian victim",lab1="Non-Russian victim",fevar=fevar)

  # Get interaction variables
  ixcon0_1 <- ix_list["ixcon0_1"]
  ixcon1_1 <- ix_list["ixcon1_1"]
  ixcon0_2 <- ix_list["ixcon0_2"]
  ixcon1_2 <- ix_list["ixcon1_2"]
  lab0 <- ix_list["lab0"]
  lab1 <- ix_list["lab1"]
  
  # Hypothetical x values
  xz <- X[,seq(0,max(get(tvar)),length.out=x_len)]
  X_new <- X %>% .[,.SD,.SDcols=out_base[[2]][["fml_all"]]%>%as.character()%>%gsub("log\\(| \\+ 1\\)|I\\(|\\/100\\)|c\\(|splines\\:\\:bs\\(|\\)|^\\~","",.)%>%str_split(" \\+ | \\* | \\~ ")%>%unlist()%>%unique()%>%.[nchar(.)>0]]%>%.[,lapply(.SD,function(x){ifelse(is.numeric(x),median(x,na.rm=TRUE),table(x)%>%sort()%>%rev()%>%names()%>%.[1])})] %>% .[rep(1,x_len)]  %>% .[,eval(tvar):=xz]
  # Generate predictions, sampling coefficients from multivariate Normal distro
  pb <- txtProgressBar(min = 0, max = n_sim, style = 3)
  set.seed(123); y_predz <- lapply(1:n_sim,function(b0){#print(b0)
    setTxtProgressBar(pb, b0)
    out_copy <- data.table::copy(out_base)
    out_copy[[1]][["coefficients"]] <- mvtnorm::rmvnorm(n=1000,mean=out_copy[[1]][["coefficients"]]%>%unlist(),sigma={out_copy[[1]]%>%.[["cov.scaled"]]},method="chol")%>%apply(2,function(x){pdrop(x)%>%sample()})%>%.[1,]
    out_copy[[2]][["coefficients"]] <- mvtnorm::rmvnorm(n=1000,mean=out_copy[[2]][["coefficients"]]%>%unlist(),sigma={out_copy[[2]]%>%.[["cov.scaled"]]},method="svd")%>%apply(2,function(x){pdrop(x)%>%sample()})%>%.[1,]
    yz_pred_bs_1_0 <- predict(out_copy[[1]],newdata=X_new%>%.[,eval(ixvar):=0]%>%.[,eval(fevar):=ixcon0_1],type="response")
    yz_pred_bs_1_1 <- predict(out_copy[[1]],newdata=X_new%>%.[,eval(ixvar):=1]%>%.[,eval(fevar):=ixcon1_1],type="response")
    yz_pred_bs_2_0 <- predict(out_copy[[2]],newdata=X_new%>%.[,eval(ixvar):=0]%>%.[,eval(fevar):=ixcon0_2],type="response")
    yz_pred_bs_2_1 <- predict(out_copy[[2]],newdata=X_new%>%.[,eval(ixvar):=1]%>%.[,eval(fevar):=ixcon1_2],type="response")
    rm(out_copy)
    list(Y1_0=yz_pred_bs_1_0,Y1_1=yz_pred_bs_1_1,Y2_0=yz_pred_bs_2_0,Y2_1=yz_pred_bs_2_1)
  }) 
  suppressMessages({
    y_predz_1_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",1) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    y_predz_1_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",2) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    y_predz_2_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",3) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    y_predz_2_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",4) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
  })
}

###
# Figure A4.19
###

{
  x_ <- log(xz+1)
  xlabz <- pretty(exp(x_)-1)%>%c(.,1,2,3,4,5,10,25)%>%sort()
  atz <- log(xlabz+1)
  rugz <- X[,log(get(tvar))]
}

# Make Figure a
w_ <- 6; h_ <- w_/1.618
png("results/figA419a.png",width=w_,height=h_,units="in",res=300)
{
  par(mar=c(4,3,0.5,0.5))
  plot(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],ylim=range(pretty(quantile(unlist(c(y_predz_1_0,y_predz_1_1)),c(0,1)))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Neighbors executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_1_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
  lines(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],lwd=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_1_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
  lines(x=x_,y=y_predz_1_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
  axis(2,las=2)
  axis(1,at=atz,labels=xlabz)
  suppressWarnings({
    rug(jitter(rugz,amount=.25),lwd=.1)
  })
  legend(x="topleft",lty=c(1,2),lwd=1.618,bty="n",legend=c(lab0,lab1),cex=1.618)
}
dev.off()

# Make Figure b
png("results/figA419b.png",width=w_,height=h_,units="in",res=300)
{
  par(mar=c(4,3,0.5,0.5))
  plot(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],ylim=range(pretty(quantile(unlist(c(y_predz_2_0,y_predz_2_1)),c(0,1),na.rm=TRUE))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Neighbors executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_2_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
  lines(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],lwd=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_2_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
  lines(x=x_,y=y_predz_2_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
  axis(2,las=2)
  axis(1,at=atz,labels=xlabz)
  suppressWarnings({
    rug(jitter(rugz,amount=.25),lwd=.1)
  })
}
dev.off()



##################################
# Victim social status
##################################

rm(list=ls())

###
# Prepare environment
###

# Custom functions
source("code/functions.R")

# Load data
X <- readRDS("data/individuals/leningrad_individuals.RDS")

###
# Model estimation and bootstrap
###

# Model specification
yvarz <- c("LA_ANY","LA_DENREMREQ")
zonez <- X %>% .[!is.na(get(yvarz[2])),grep("^ZONE_",names(.),value=TRUE)[apply(.SD,2,sum,na.rm=TRUE)>0],.SDcols=grep("^ZONE_",names(.),value=TRUE)]%>%paste0(collapse="+")
tvar <- "N_EVENT_100"
tvar <- "N_EVENT_R"
xlab_<-"Neighbors executed"

# Set parameters
n_sim <- 1000
x_len <- 1000

# Parametric bootstrap SE
{
  ii0 <- 1
  ixvar <- c("educ_higher")[ii0]
  fevar <- "nationality_short"

  # GLM
  suppressMessages({
    formz_base <- Formula::Formula(formula(paste("c(",paste(yvarz,collapse=", "),") ~ log(",tvar,"+1)*",ixvar," + male + ",zonez," | RAY1936 + nationality_short + okonkh + party")))
  })
  out_base <- fixest::feglm(formz_base,data=X,family="binomial",vcov="HC1"); fixest::etable(out_base)

  # FE values
  fez <- data.table(nat=fixest::fixef(out_base[[1]])[[fevar]]%>%names(),fe1=fixest::fixef(out_base[[1]])[[fevar]]) %>% .[,fe2:=match(nat,names(fixest::fixef(out_base[[2]])[[fevar]]))%>%fixest::fixef(out_base[[2]])[[fevar]][.]]%>%.[nat!=""]
  ix_list <- c(ixvar=ixvar,ixcon0_1="рус",ixcon1_1=fez[which.min(fe1-fe1[nat=="рус"]),nat],ixcon0_2="рус",ixcon1_2=fez[which.max(fe2-fe2[nat=="рус"]),nat],lab0=c("Secondary education or less")[ii0],lab1=c("University education")[ii0],fevar=fevar)

  # Get interaction variables
  ixcon0_1 <- ix_list["ixcon0_1"]
  ixcon1_1 <- ix_list["ixcon1_1"]
  ixcon0_2 <- ix_list["ixcon0_2"]
  ixcon1_2 <- ix_list["ixcon1_2"]
  lab0 <- ix_list["lab0"]
  lab1 <- ix_list["lab1"]
  
  # Hypothetical x values
  xz <- X[,seq(0,max(get(tvar)),length.out=x_len)]
  X_new <- X %>% .[,.SD,.SDcols=out_base[[2]][["fml_all"]]%>%as.character()%>%gsub("log\\(| \\+ 1\\)|I\\(|\\/100\\)|c\\(|splines\\:\\:bs\\(|\\)|^\\~","",.)%>%str_split(" \\+ | \\* | \\~ ")%>%unlist()%>%unique()%>%.[nchar(.)>0]]%>%.[,lapply(.SD,function(x){ifelse(is.numeric(x),median(x,na.rm=TRUE),table(x)%>%sort()%>%rev()%>%names()%>%.[1])})] %>% .[rep(1,x_len)]  %>% .[,eval(tvar):=xz]
  # Generate predictions, sampling coefficients from multivariate Normal distro
  pb <- txtProgressBar(min = 0, max = n_sim, style = 3)
  # b0 <- 1
  set.seed(123); y_predz <- lapply(1:n_sim,function(b0){#print(b0)
    setTxtProgressBar(pb, b0)
    out_copy <- data.table::copy(out_base)
    out_copy[[1]][["coefficients"]] <- mvtnorm::rmvnorm(n=1000,mean=out_copy[[1]][["coefficients"]]%>%unlist(),sigma={out_copy[[1]]%>%.[["cov.scaled"]]},method="chol")%>%apply(2,function(x){pdrop(x)%>%sample()})%>%.[1,]
    # %>%apply(2,mean)
    out_copy[[2]][["coefficients"]] <- mvtnorm::rmvnorm(n=1000,mean=out_copy[[2]][["coefficients"]]%>%unlist(),sigma={out_copy[[2]]%>%.[["cov.scaled"]]},method="svd")%>%apply(2,function(x){pdrop(x)%>%sample()})%>%.[1,]
    # %>%apply(2,mean)
    yz_pred_bs_1_0 <- predict(out_copy[[1]],newdata=X_new%>%.[,eval(ixvar):=0],type="response")
    yz_pred_bs_1_1 <- predict(out_copy[[1]],newdata=X_new%>%.[,eval(ixvar):=1],type="response")
    yz_pred_bs_2_0 <- predict(out_copy[[2]],newdata=X_new%>%.[,eval(ixvar):=0],type="response")
    yz_pred_bs_2_1 <- predict(out_copy[[2]],newdata=X_new%>%.[,eval(ixvar):=1],type="response")
    rm(out_copy)
    list(Y1_0=yz_pred_bs_1_0,Y1_1=yz_pred_bs_1_1,Y2_0=yz_pred_bs_2_0,Y2_1=yz_pred_bs_2_1)
  }) 
  suppressMessages({
    y_predz_1_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",1) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    y_predz_1_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",2) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    y_predz_2_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",3) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    y_predz_2_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",4) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
  })
}


###
# Figure A4.20
###

{
  x_ <- log(xz+1)
  xlabz <- pretty(exp(x_)-1)%>%c(.,1,2,3,4,5,10,25)%>%sort()
  atz <- log(xlabz+1)
  rugz <- X[,log(get(tvar))]
}
w_ <- 6; h_ <- w_/1.618

# Make Figure a
png("results/figA420a.png",width=w_,height=h_,units="in",res=300)
{
  par(mar=c(4,3,0.5,0.5))
  plot(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],ylim=range(pretty(quantile(unlist(c(y_predz_1_0,y_predz_1_1)),c(0,1)))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Neighbors executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_1_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
  lines(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],lwd=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_1_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
  lines(x=x_,y=y_predz_1_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
  axis(2,las=2)
  axis(1,at=atz,labels=xlabz)
  suppressWarnings({
    rug(jitter(rugz,amount=.25),lwd=.1)
  })
  legend(x="topleft",lty=c(1,2),lwd=1.618,bty="n",legend=c(lab0,lab1),cex=1.618)
}
dev.off()

# Make Figure b
png("results/figA420b.png",width=w_,height=h_,units="in",res=300)
{
  par(mar=c(4,3,0.5,0.5))
  plot(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],ylim=range(pretty(quantile(unlist(c(y_predz_2_0,y_predz_2_1)),c(0,1),na.rm=TRUE))),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Neighbors executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_2_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
  lines(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],lwd=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_2_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
  lines(x=x_,y=y_predz_2_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
  axis(2,las=2)
  axis(1,at=atz,labels=xlabz)
  suppressWarnings({
    rug(jitter(rugz,amount=.25),lwd=.1)
  })
}
dev.off()


##################################
# Local political opportunity structure
##################################

rm(list=ls())

###
# Prepare environment
###

# Custom functions
source("code/functions.R")

# Load data
X <- sf::read_sf("data/blocks/leningrad1935_blocks.geojson") %>% data.table::as.data.table()  %>% .[!is.na(RAY1936_NAME)] %>%  .[,paste0(grep("^pbl_sec_p3",names(.),value=TRUE),"_b"):=lapply(.SD,function(x){1*(x>0)}),.SDcols=grep("^pbl_sec_p3",names(.))] 

###
# Model estimation and bootstrap
###

# Model specification
yvarz <- c("N_TOTAL_R","DENREMREQ_PCTR")
zonez <- X %>% .[!is.na(get(yvarz[2])),grep("^ZONE_",names(.),value=TRUE)[apply(.SD,2,sum,na.rm=TRUE)>0],.SDcols=grep("^ZONE_",names(.),value=TRUE)]%>%paste0(collapse="+")
tvar <- "N_EVENT_R"
ixvarz <- X %>% names(.) %>% grep("^pbl_sec_p3",.,value=TRUE) %>% grep("_b$",.,value=TRUE) #%>% paste0("I(",.,"*100)")
ix_list <- lapply(ixvarz,function(ix){c(ixvar=ix,lab0="No state security presence",lab1="State security presence",xlab_="Number of city block residents executed")})

# Set parameters
n_sim <- 1000
x_len <- 1000

# Parametric bootstrap SE
{

  ii0 <- 1
  ixvar <- ixvarz[ii0]

  # Models
  suppressMessages({
    formz_base <- Formula::Formula(formula(paste0("c(log(",yvarz[1],"+1),log(",yvarz[2],"+1)) ~ log(",tvar,"+1)*",ixvar," + ", zonez, "+ splines::bs(LONG)*splines::bs(LAT) | RAY1936_NAME")))
  })
  out_base <- fixest::feglm(formz_base, data = X, weights = ~ wr, lean = FALSE, se = "HC1", family="gaussian") ; fixest::etable(out_base)
  suppressMessages({
    formz_base2 <- Formula::Formula(paste0("c(",paste0(yvarz[2],"/100",collapse=", "),") ~ log(",tvar,"+1)*",ixvar," + ",zonez," + splines::ns(LONG,2)*splines::ns(LAT,2)|  RAY1936_NAME  ")%>%as.formula())
  })
  out_base2 <- fixest::feglm(formz_base2, data = X, weights = ~ wr, lean = FALSE, family="quasibinomial", vcov = "HC1") ; fixest::etable(out_base2)
  # Hypothetical x values
  xz <- X[,seq(min(N_EVENT_R,na.rm=TRUE),max(N_EVENT_R,na.rm=TRUE),length.out=x_len)]
  X_new <- X %>% .[,.SD,.SDcols=out_base[[1]][["fml_all"]]%>%as.character()%>%gsub("log\\(| \\+ 1\\)|I\\(|\\/100|\\*100|100\\)|\\,\\ \\d{1}|c\\(|splines\\:\\:bs\\(|splines\\:\\:ns\\(|\\)|^\\~","",.)%>%stringr::str_split(" \\+ | \\* | \\~ ")%>%unlist()%>%unique()%>%.[nchar(.)>0]]%>%.[,lapply(.SD,function(x){ifelse(is.numeric(x),median(x,na.rm=TRUE),table(x)%>%sort()%>%rev()%>%names()%>%.[1])})] %>% 
    .[,RAY1936_NAME:=fixest::fixef(out_base2)$RAY1936_NAME%>%which.min()%>%names()] %>% 
    .[rep(1,x_len)]  %>% 
    .[,c("N_EVENT","N_EVENT_R"):=xz] %>% 
    .[,c("LONG","LAT"):=X[RAY1936_NAME%in%.[,RAY1936_NAME[1]],lapply(.SD,mean),.SDcols=c("LONG","LAT")]]
   
  # Generate predictions, sampling coefficients from multivariate Normal distro
  pb <- txtProgressBar(min = 0, max = n_sim, style = 3)
  set.seed(123); y_predz <- lapply(1:n_sim,function(b0){#print(b0)
    setTxtProgressBar(pb, b0)
    # Sample coefficients
    out_copy <- data.table::copy(out_base)
    out_copy[[1]][["coefficients"]] <- mvtnorm::rmvnorm(n=100,mean=out_copy[[1]][["coefficients"]]%>%unlist(),sigma=out_copy[[1]]%>%.[["cov.scaled"]])%>%apply(2,function(x){pdrop(x)%>%sample()})%>%.[1,]
    out_copy2 <- data.table::copy(out_base2)
    out_copy2[["coefficients"]] <- mvtnorm::rmvnorm(n=100,mean=out_copy2[["coefficients"]]%>%unlist(),sigma=out_copy2[["cov.scaled"]],method="svd")%>%apply(2,function(x){pdrop(x)%>%sample()})%>%.[1,]
    # Predictions
    yz_pred_bs_1_0 <- predict(out_copy[[1]],newdata=X_new %>% .[,eval(ixvar):=0],type="response") %>% (function(x){exp(x)-1})
    yz_pred_bs_1_1 <- predict(out_copy[[1]],newdata=X_new %>% .[,eval(ixvar):=1],type="response") %>% (function(x){exp(x)-1})
    yz_pred_bs_2_0 <- predict(out_copy2,newdata=X_new %>% .[,eval(ixvar):=0],type="response") %>% (function(x){x*100})
    yz_pred_bs_2_1 <- predict(out_copy2,newdata=X_new %>% .[,eval(ixvar):=1],type="response") %>% (function(x){x*100})
    rm(out_copy)
    list(Y1_0=yz_pred_bs_1_0,Y1_1=yz_pred_bs_1_1,Y2_0=yz_pred_bs_2_0,Y2_1=yz_pred_bs_2_1)
  }) 
  suppressMessages({
    y_predz_1_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",1) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    y_predz_1_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",2) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    y_predz_2_0 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",3) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
    y_predz_2_1 <- y_predz %>% .[lapply(y_predz,length)==4] %>% sapply("[",4) %>% .[lapply(.,length)==length(xz)] %>% dplyr::bind_cols() %>% data.table::as.data.table()
  })
}

###
# Figure A4.21
###

# Log scale
{
  x_ <- log(xz+1)
  xlabz <- pretty(exp(x_)-1)%>%c(.,1,2,3,4,5,10,25)%>%sort()
  atz <- log(xlabz+1)
  rugz <- X[,log(get(tvar))]
  lab0 <- ix_list[[ii0]]["lab0"]
  lab1 <- ix_list[[ii0]]["lab1"]
}
w_ <- 6; h_ <- w_/1.618

# Make Figure a
png("results/figA421a.png",width=w_,height=h_,units="in",res=300)
{
  par(mar=c(4,3,0.5,0.5))
  plot(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],ylim=range(pretty(quantile(unlist(c(y_predz_1_0,y_predz_1_1)),c(0,1))))%>%(function(x){x[x< -1]<- -1;x}),type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Number of city block residents executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_1_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
  lines(x=x_,y=y_predz_1_0[,apply(.SD,1,median)],lwd=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_1_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_1_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
  lines(x=x_,y=y_predz_1_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
  axis(2,las=2)
  axis(1,at=atz,labels=xlabz)
  suppressWarnings({
    rug(jitter(rugz,amount=.25),lwd=.25)
  })
  legend(x="topleft",lty=c(1,2),lwd=1.618,bty="n",legend=c(lab0,lab1),cex=1.618)
}
dev.off()

# Make Figure b
png("results/figA421b.png",width=w_,height=h_,units="in",res=300)
{
  yran <- range(pretty(quantile(unlist(c(y_predz_2_0,y_predz_2_1)),c(0,1))))%>%(function(x){x[x<0]<-0;x[x>250]<-250;x})
  par(mar=c(4,3,0.5,0.5))
  plot(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],ylim=yran,type="l",col=NA,bty="n",yaxt="n",ylab="",xlab="Number of city block residents executed",xaxt="n",xlim=range(x_),cex.lab=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_2_0[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_0[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.85,.85,.85,.5))
  lines(x=x_,y=y_predz_2_0[,apply(.SD,1,median)],lwd=1.618)
  polygon(x=c(x_,rev(x_)),y=c(y_predz_2_1[,apply(.SD,1,function(x){median(x)+1.96*sd(x)})],rev(y_predz_2_1[,apply(.SD,1,function(x){median(x)-1.96*sd(x)})])),border=NA,col=rgb(.7,.7,.7,.5))
  lines(x=x_,y=y_predz_2_1[,apply(.SD,1,median)],lwd=1.618,lty=2)
  axis(2,las=2,at=yran%>%pretty(),labels={yran%>%pretty()})
  axis(1,at=atz,labels=xlabz)
  suppressWarnings({
    rug(jitter(rugz,amount=.25),lwd=.25)
  })
}
dev.off()






##############################################################################
##############################################################################

# Print upon finishing
print("**** Finished run2_main.R ****")
